linux-fsdevel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC/PATCH] revoke/frevoke system calls V2
@ 2006-07-27 14:25 Pekka J Enberg
  2006-07-27 15:07 ` Alan Cox
                   ` (3 more replies)
  0 siblings, 4 replies; 51+ messages in thread
From: Pekka J Enberg @ 2006-07-27 14:25 UTC (permalink / raw)
  To: linux-kernel; +Cc: linux-fsdevel, akpm, viro, alan, tytso, tigran

From: Pekka Enberg <penberg@cs.helsinki.fi>

This patch implements the revoke(2) and frevoke(2) system calls for
all types of files. The operation is done in passes: first we replace
all pointers to the file with NULL in fd tables, then in a second pass,
we take down shared mappings, sync the file to ensure no I/O operations 
are in-flight, and finally close the file. If mmap takedown or sync fails,
we restore the fds to point to the file.

This patch addresses two complaints from Andrew Morton: no kmalloc
under tasklist_lock and keep fget_light/fput_light locking in sys_read
and sys_write. To ensure do_revoke does not race with users of
fget_light/fput_light, we delay closing of the files until fput_light
is called. These bits were taken from the forced unmount patch by
Tigran Aivazian.

There are two known remaining issues: if someone expands the fd
tables, we will BUG_ON. Edgar Toerning expressed concers over allowing
any user to remove mappings from another process and letting it
crash. Albert Cahalan suggested either converting the shared mapping
to private or substitute the unmapped pages with zeroed pages.

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
---

 arch/i386/kernel/syscall_table.S |    2 
 fs/Makefile                      |    2 
 fs/file_table.c                  |    1 
 fs/revoke.c                      |  315 +++++++++++++++++++++++++++++++++++++++
 include/asm-i386/unistd.h        |    4 
 include/linux/file.h             |   14 +
 include/linux/fs.h               |    2 
 include/linux/syscalls.h         |    3 
 8 files changed, 341 insertions(+), 2 deletions(-)

Index: 2.6/arch/i386/kernel/syscall_table.S
===================================================================
--- 2.6.orig/arch/i386/kernel/syscall_table.S
+++ 2.6/arch/i386/kernel/syscall_table.S
@@ -317,3 +317,5 @@ ENTRY(sys_call_table)
 	.long sys_tee			/* 315 */
 	.long sys_vmsplice
 	.long sys_move_pages
+	.long sys_revoke
+	.long sys_frevoke
Index: 2.6/fs/Makefile
===================================================================
--- 2.6.orig/fs/Makefile
+++ 2.6/fs/Makefile
@@ -10,7 +10,7 @@ obj-y :=	open.o read_write.o file_table.
 		ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o mpage.o direct-io.o \
-		ioprio.o pnode.o drop_caches.o splice.o sync.o
+		ioprio.o pnode.o drop_caches.o splice.o sync.o revoke.o
 
 obj-$(CONFIG_INOTIFY)		+= inotify.o
 obj-$(CONFIG_INOTIFY_USER)	+= inotify_user.o
Index: 2.6/fs/revoke.c
===================================================================
--- /dev/null
+++ 2.6/fs/revoke.c
@@ -0,0 +1,315 @@
+/*
+ * fs/revoke.c - Invalidate all current open file descriptors of an inode.
+ *
+ * Copyright (C) 2006 Pekka Enberg
+ *
+ * This file is released under the GPLv2.
+ */
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/namei.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
+
+/*
+ * Auxiliary struct for keeping track of revoked files.
+ */
+struct revoked_file {
+	unsigned int fd;
+	struct file *file;
+	struct task_struct *owner;
+};
+
+/*
+ * 	LOCKING: task_lock(owner)
+ */
+static unsigned long revoke_fds(struct task_struct *owner,
+				struct inode *inode,
+				struct file *exclude,
+				struct revoked_file *to_close,
+				unsigned long nr_fds,
+				unsigned long max_fds)
+{
+	unsigned long offset;
+	struct files_struct *files;
+	struct fdtable *fdt;
+	unsigned int fd;
+
+	files = get_files_struct(owner);
+	if (!files)
+		return 0;
+
+	offset = nr_fds;
+
+	spin_lock(&files->file_lock);
+	fdt = files_fdtable(files);
+	for (fd = 0; fd < fdt->max_fds; fd++) {
+		struct file *file;
+		struct revoked_file *revoked;
+
+		file = fcheck_files(files, fd);
+		if (!file)
+			continue;
+
+		if (file == exclude)
+			continue;
+
+		if (file->f_dentry->d_inode != inode)
+			continue;
+
+		/*
+		 * Leak the fd so it is not reused. After this point, we don't
+		 * need to worry about racing with sys_close or sys_dup.
+		 */
+		rcu_assign_pointer(fdt->fd[fd], NULL);
+		FD_CLR(fd, fdt->close_on_exec);
+
+		/*
+		 * Hold on to task until we can take down the file and its
+		 * mmap.
+		 */
+		get_task_struct(owner);
+
+		BUG_ON(offset >= max_fds);
+		revoked = &to_close[offset++];
+		revoked->fd    = fd;
+		revoked->file  = file;
+		revoked->owner = owner;
+	}
+	spin_unlock(&files->file_lock);
+	put_files_struct(files);
+	return offset;
+}
+
+static int revoke_mmap(struct revoked_file *revoked)
+{
+	int err = 0;
+	struct mm_struct *mm;
+	struct vm_area_struct *this, *next;
+
+	mm = get_task_mm(revoked->owner);
+	down_write(&mm->mmap_sem);
+
+	/*
+	 * Be careful, do_munmap removes the unmapped vma from mm->mmap list.
+	 */
+	this = mm->mmap;
+	while (this) {
+		next = this->vm_next;
+		if (this->vm_flags & VM_SHARED && this->vm_file == revoked->file) {
+			err = do_munmap(mm, this->vm_start,
+					this->vm_end - this->vm_start);
+			if (err)
+				break;
+		}
+		this = next;
+	}
+	up_write(&mm->mmap_sem);
+	mmput(mm);
+	return err;
+}
+
+static int close_files(struct revoked_file *revoked)
+{
+	int err = 0;
+	struct files_struct *files;
+
+	files = get_files_struct(revoked->owner);
+	if (files) {
+		err = filp_close(revoked->file, files);
+		put_files_struct(files);
+	}
+	return err;
+}
+
+static void restore_files(struct revoked_file *to_restore, unsigned long nr_fds)
+{
+	unsigned long i;
+
+	for (i = 0; i < nr_fds; i++) {
+		struct revoked_file *this;
+		struct files_struct *files;
+
+		this = &to_restore[i];
+		if (!this)
+			continue;
+
+		files = get_files_struct(this->owner);
+		if (files) {
+			struct fdtable *fdt;
+
+			spin_lock(&files->file_lock);
+			fdt = files_fdtable(files);
+			rcu_assign_pointer(fdt->fd[this->fd], this->file);
+			FD_SET(this->fd, fdt->close_on_exec);
+			spin_unlock(&files->file_lock);
+			put_files_struct(files);
+		}
+
+		put_task_struct(this->owner);
+	}
+}
+
+static int cleanup_files(struct revoked_file *to_cleanup, unsigned long nr_fds)
+{
+	int err = 0;
+	unsigned long i;
+
+	for (i = 0; i < nr_fds; i++) {
+		struct revoked_file *this;
+
+		this = &to_cleanup[i];
+
+		err = revoke_mmap(this);
+		if (err)
+			break;
+
+		err = do_fsync(this->file, 1);
+		if (err)
+			break;
+
+		err = close_files(this);
+
+		put_task_struct(this->owner);
+		if (err)
+			break;
+	}
+	if (err)
+		restore_files(&to_cleanup[i], nr_fds-i);
+
+	return err;
+}
+
+/*
+ *	Returns the maximum number of fds pointing to inode.
+ *
+ *	LOCKING: read_lock(&tasklist_lock)
+ */
+static unsigned long inode_fds(struct inode *inode, struct file *exclude)
+{
+	struct task_struct *g, *p;
+	unsigned long nr_fds = 0;
+
+	do_each_thread(g, p) {
+		struct files_struct *files;
+		struct fdtable *fdt;
+		unsigned int fd;
+
+		files = get_files_struct(p);
+		if (!files)
+			continue;
+
+		spin_lock(&files->file_lock);
+		fdt = files_fdtable(files);
+		for (fd = 0; fd < fdt->max_fds; fd++) {
+			struct file *file;
+
+			file = fcheck_files(files, fd);
+			if (file && file != exclude &&
+			    file->f_dentry->d_inode == inode) {
+				/*
+				 * FIXME: If someone expands fd table, we can overflow.
+				 */
+				nr_fds += fdt->max_fds;
+				break;
+			}
+		}
+		spin_unlock(&files->file_lock);
+		put_files_struct(files);
+	} while_each_thread(g, p);
+	return nr_fds;
+}
+
+/*
+ *	Only allocate memory for those threads that actually have an fd
+ *	pointing to the inode.
+ */
+static struct revoked_file *alloc_revoke_table(struct inode *inode,
+					       struct file *exclude,
+					       unsigned long *nr_fds)
+{
+	read_lock(&tasklist_lock);
+	*nr_fds = inode_fds(inode, exclude);
+	read_unlock(&tasklist_lock);
+
+	return kcalloc(*nr_fds, sizeof(struct revoked_file), GFP_KERNEL);
+}
+
+static int do_revoke(struct inode *inode, struct file *exclude)
+{
+	int err = 0;
+	unsigned long nr_fds, max_fds;
+	struct revoked_file *to_close = NULL;
+	struct task_struct *g, *p;
+
+	if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) {
+		err = -EPERM;
+		goto out;
+	}
+
+  retry:
+	if (signal_pending(current)) {
+		err = -ERESTARTSYS;
+		goto out;
+	}
+
+	to_close = alloc_revoke_table(inode, exclude, &max_fds);
+	if (!to_close) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	read_lock(&tasklist_lock);
+
+	/*
+	 * If someone forked while we were allocating memory, try again.
+	 */
+	if (inode_fds(inode, exclude) > max_fds) {
+		read_unlock(&tasklist_lock);
+		goto retry;
+	}
+
+	/*
+	 * First revoke the fds. After we are done, no one can start new
+	 * operations on them.
+	 */
+	nr_fds = 0;
+	do_each_thread(g, p) {
+		nr_fds = revoke_fds(p, inode, exclude, to_close,
+				    nr_fds, max_fds);
+	} while_each_thread(g, p);
+	read_unlock(&tasklist_lock);
+
+	/*
+	 * Now, take down the mmaps and close the files for good.
+	 */
+	err = cleanup_files(to_close, nr_fds);
+  out:
+	kfree(to_close);
+	return err;
+}
+
+asmlinkage int sys_revoke(const char __user *filename)
+{
+	int err;
+	struct nameidata nd;
+
+	err = __user_walk(filename, 0, &nd);
+	if (!err) {
+		err = do_revoke(nd.dentry->d_inode, NULL);
+		path_release(&nd);
+	}
+	return err;
+}
+
+asmlinkage int sys_frevoke(unsigned int fd)
+{
+	struct file *file = fget(fd);
+	int err = -EBADF;
+
+	if (file) {
+		err = do_revoke(file->f_dentry->d_inode, file);
+		fput(file);
+	}
+	return err;
+}
Index: 2.6/include/asm-i386/unistd.h
===================================================================
--- 2.6.orig/include/asm-i386/unistd.h
+++ 2.6/include/asm-i386/unistd.h
@@ -323,10 +323,12 @@
 #define __NR_tee		315
 #define __NR_vmsplice		316
 #define __NR_move_pages		317
+#define __NR_revoke		318
+#define __NR_frevoke		319
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 318
+#define NR_syscalls 320
 
 /*
  * user-visible error numbers are in the range -1 - -128: see
Index: 2.6/include/linux/syscalls.h
===================================================================
--- 2.6.orig/include/linux/syscalls.h
+++ 2.6/include/linux/syscalls.h
@@ -597,4 +597,7 @@ asmlinkage long sys_get_robust_list(int 
 asmlinkage long sys_set_robust_list(struct robust_list_head __user *head,
 				    size_t len);
 
+asmlinkage int sys_revoke(const char __user *filename);
+asmlinkage int sys_frevoke(unsigned int fd);
+
 #endif
Index: 2.6/fs/file_table.c
===================================================================
--- 2.6.orig/fs/file_table.c
+++ 2.6/fs/file_table.c
@@ -218,6 +218,7 @@ struct file fastcall *fget_light(unsigne
 	*fput_needed = 0;
 	if (likely((atomic_read(&files->count) == 1))) {
 		file = fcheck_files(files, fd);
+		set_f_light(file);
 	} else {
 		rcu_read_lock();
 		file = fcheck_files(files, fd);
Index: 2.6/include/linux/file.h
===================================================================
--- 2.6.orig/include/linux/file.h
+++ 2.6/include/linux/file.h
@@ -6,6 +6,7 @@
 #define __LINUX_FILE_H
 
 #include <asm/atomic.h>
+#include <linux/fs.h>
 #include <linux/posix_types.h>
 #include <linux/compiler.h>
 #include <linux/spinlock.h>
@@ -67,10 +68,23 @@ struct files_struct {
 extern void FASTCALL(__fput(struct file *));
 extern void FASTCALL(fput(struct file *));
 
+static inline void clear_f_light(struct file *file)
+{
+	file->f_light = 0;
+}
+
+static inline void set_f_light(struct file *file)
+{
+	if (file)
+		file->f_light = 1;
+}
+
 static inline void fput_light(struct file *file, int fput_needed)
 {
 	if (unlikely(fput_needed))
 		fput(file);
+	else
+		clear_f_light(file);
 }
 
 extern struct file * FASTCALL(fget(unsigned int fd));
Index: 2.6/include/linux/fs.h
===================================================================
--- 2.6.orig/include/linux/fs.h
+++ 2.6/include/linux/fs.h
@@ -698,6 +698,8 @@ struct file {
 	struct list_head	f_ep_links;
 	spinlock_t		f_ep_lock;
 #endif /* #ifdef CONFIG_EPOLL */
+	/* This instance is being used without holding a reference. */
+	int			f_light;
 	struct address_space	*f_mapping;
 };
 extern spinlock_t files_lock;

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-07-27 14:25 [RFC/PATCH] revoke/frevoke system calls V2 Pekka J Enberg
@ 2006-07-27 15:07 ` Alan Cox
  2006-07-27 15:33   ` Pekka Enberg
  2006-07-27 16:41 ` Ulrich Drepper
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 51+ messages in thread
From: Alan Cox @ 2006-07-27 15:07 UTC (permalink / raw)
  To: Pekka J Enberg; +Cc: linux-kernel, linux-fsdevel, akpm, viro, tytso, tigran

Ar Iau, 2006-07-27 am 17:25 +0300, ysgrifennodd Pekka J Enberg:
> There are two known remaining issues: if someone expands the fd
> tables, we will BUG_ON. Edgar Toerning expressed concers over allowing
> any user to remove mappings from another process and letting it
> crash. Albert Cahalan suggested either converting the shared mapping
> to private or substitute the unmapped pages with zeroed pages.

That should be three I think. frevoke and revoke should not return until
all the existing outstanding is dead. For devices that means we need to
wake up the device where possible and really suggests we need a device
->revoke method. TTY devices need this to allow us to re-implement
vhangup in terms of revoke. Other devices devices are not all
sufficiently secure without this check. Some may also want to use this
hook to ensure that any security context is dead (eg cached crypto
keys).

Alan


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-07-27 15:07 ` Alan Cox
@ 2006-07-27 15:33   ` Pekka Enberg
  2006-07-27 16:09     ` Alan Cox
  0 siblings, 1 reply; 51+ messages in thread
From: Pekka Enberg @ 2006-07-27 15:33 UTC (permalink / raw)
  To: Alan Cox; +Cc: linux-kernel, linux-fsdevel, akpm, viro, tytso, tigran

On 7/27/06, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> That should be three I think. frevoke and revoke should not return until
> all the existing outstanding is dead. For devices that means we need to
> wake up the device where possible and really suggests we need a device
> ->revoke method. TTY devices need this to allow us to re-implement
> vhangup in terms of revoke. Other devices devices are not all
> sufficiently secure without this check. Some may also want to use this
> hook to ensure that any security context is dead (eg cached crypto
> keys).

Don't device drivers already do that for f_ops->flush (filp_close) and
vm_ops->close (munmap)? What revoke and frevoke do is basically
unmap/fsync/close on all the open file descriptors.

                                    Pekka

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-07-27 16:09     ` Alan Cox
@ 2006-07-27 16:01       ` Pekka J Enberg
  2006-07-27 16:30         ` Alan Cox
  0 siblings, 1 reply; 51+ messages in thread
From: Pekka J Enberg @ 2006-07-27 16:01 UTC (permalink / raw)
  To: Alan Cox; +Cc: linux-kernel, linux-fsdevel, akpm, viro, tytso, tigran

Ar Iau, 2006-07-27 am 18:33 +0300, ysgrifennodd Pekka Enberg:
> > Don't device drivers already do that for f_ops->flush (filp_close) and

On Thu, 27 Jul 2006, Alan Cox wrote:
> ->flush is called when each closing occurs.

Yes revoke calls it too, but is that sufficient, or do we need ->revoke?

Ar Iau, 2006-07-27 am 18:33 +0300, ysgrifennodd Pekka Enberg:
> > vm_ops->close (munmap)? What revoke and frevoke do is basically
> > unmap/fsync/close on all the open file descriptors.

On Thu, 27 Jul 2006, Alan Cox wrote:
> What happens if an app is already blocked on a read when you do a
> revoke ? The nasty case answer could be "it completes later on and
> returns the users captured password"

Ouch. You are right. I need to stick that invalidate_inode_pages2 
back in there. The do_fsync call takes care of writes only, obviously. 
Thanks!

				Pekka

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-07-27 15:33   ` Pekka Enberg
@ 2006-07-27 16:09     ` Alan Cox
  2006-07-27 16:01       ` Pekka J Enberg
  0 siblings, 1 reply; 51+ messages in thread
From: Alan Cox @ 2006-07-27 16:09 UTC (permalink / raw)
  To: Pekka Enberg; +Cc: linux-kernel, linux-fsdevel, akpm, viro, tytso, tigran

Ar Iau, 2006-07-27 am 18:33 +0300, ysgrifennodd Pekka Enberg:
> Don't device drivers already do that for f_ops->flush (filp_close) and

->flush is called when each closing occurs.

> vm_ops->close (munmap)? What revoke and frevoke do is basically
> unmap/fsync/close on all the open file descriptors.

What happens if an app is already blocked on a read when you do a
revoke ? The nasty case answer could be "it completes later on and
returns the users captured password"

Alan


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-07-27 16:01       ` Pekka J Enberg
@ 2006-07-27 16:30         ` Alan Cox
  2006-07-27 17:07           ` Pekka J Enberg
  2006-07-27 18:27           ` Pekka Enberg
  0 siblings, 2 replies; 51+ messages in thread
From: Alan Cox @ 2006-07-27 16:30 UTC (permalink / raw)
  To: Pekka J Enberg; +Cc: linux-kernel, linux-fsdevel, akpm, viro, tytso, tigran

Ar Iau, 2006-07-27 am 19:01 +0300, ysgrifennodd Pekka J Enberg:
> Yes revoke calls it too, but is that sufficient, or do we need ->revoke?
> Ouch. You are right. I need to stick that invalidate_inode_pages2 
> back in there. The do_fsync call takes care of writes only, obviously. 

Actually that isn't true either - it takes care of *regular file*
writes. Devices will need a revoke hook and thats really probably only
right. If they don't have one just -EOPNOTSUPP, you can check it before
you begin any other processing so its easy to check.

Alan

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-07-27 14:25 [RFC/PATCH] revoke/frevoke system calls V2 Pekka J Enberg
  2006-07-27 15:07 ` Alan Cox
@ 2006-07-27 16:41 ` Ulrich Drepper
  2006-07-27 17:05   ` Pekka J Enberg
  2006-07-27 18:06 ` Petr Baudis
  2006-08-05 12:29 ` Pavel Machek
  3 siblings, 1 reply; 51+ messages in thread
From: Ulrich Drepper @ 2006-07-27 16:41 UTC (permalink / raw)
  To: Pekka J Enberg
  Cc: linux-kernel, linux-fsdevel, akpm, viro, alan, tytso, tigran

On 7/27/06, Pekka J Enberg <penberg@cs.helsinki.fi> wrote:
> +asmlinkage int sys_revoke(const char __user *filename)

Could we just plainly avoid adding any new syscalls which take a
filename without extending the interface like the *at functions?
I.e., add a file descriptor parameter, handle AT_FDCWD, etc.  The
additional effort is really minimal.  Even if, as in this case, the
function is propably not used in situations where the filename use is
racy there are still those people to consider who want to implement a
virtual per-thread current working directory.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-07-27 16:41 ` Ulrich Drepper
@ 2006-07-27 17:05   ` Pekka J Enberg
  2006-07-27 17:13     ` Ulrich Drepper
  2006-07-27 17:33     ` Alan Cox
  0 siblings, 2 replies; 51+ messages in thread
From: Pekka J Enberg @ 2006-07-27 17:05 UTC (permalink / raw)
  To: Ulrich Drepper
  Cc: linux-kernel, linux-fsdevel, akpm, viro, alan, tytso, tigran

On 7/27/06, Pekka J Enberg <penberg@cs.helsinki.fi> wrote:
> > +asmlinkage int sys_revoke(const char __user *filename)

On Thu, 27 Jul 2006, Ulrich Drepper wrote:
> Could we just plainly avoid adding any new syscalls which take a
> filename without extending the interface like the *at functions?
> I.e., add a file descriptor parameter, handle AT_FDCWD, etc.  The
> additional effort is really minimal.  Even if, as in this case, the
> function is propably not used in situations where the filename use is
> racy there are still those people to consider who want to implement a
> virtual per-thread current working directory.

Sure. Though I wonder if sys_frevoke is enough for us and we can drop 
sys_revoke completely.

				Pekka

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-07-27 16:30         ` Alan Cox
@ 2006-07-27 17:07           ` Pekka J Enberg
  2006-07-27 18:27           ` Pekka Enberg
  1 sibling, 0 replies; 51+ messages in thread
From: Pekka J Enberg @ 2006-07-27 17:07 UTC (permalink / raw)
  To: Alan Cox; +Cc: linux-kernel, linux-fsdevel, akpm, viro, tytso, tigran

Ar Iau, 2006-07-27 am 19:01 +0300, ysgrifennodd Pekka J Enberg:
> > Yes revoke calls it too, but is that sufficient, or do we need ->revoke?
> > Ouch. You are right. I need to stick that invalidate_inode_pages2 
> > back in there. The do_fsync call takes care of writes only, obviously. 
 
On Thu, 27 Jul 2006, Alan Cox wrote:
> Actually that isn't true either - it takes care of *regular file*
> writes. Devices will need a revoke hook and thats really probably only
> right. If they don't have one just -EOPNOTSUPP, you can check it before
> you begin any other processing so its easy to check.

Ah, you're right. So I'll make a generic_file_revoke and f_ops->revoke 
that can be used by filesystems to do sync and inode page invalidation. 
That hook should be sufficient for device drivers too?

				Pekka

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-07-27 17:05   ` Pekka J Enberg
@ 2006-07-27 17:13     ` Ulrich Drepper
  2006-07-27 17:33       ` H. Peter Anvin
  2006-07-27 17:33     ` Alan Cox
  1 sibling, 1 reply; 51+ messages in thread
From: Ulrich Drepper @ 2006-07-27 17:13 UTC (permalink / raw)
  To: Pekka J Enberg
  Cc: linux-kernel, linux-fsdevel, akpm, viro, alan, tytso, tigran

On 7/27/06, Pekka J Enberg <penberg@cs.helsinki.fi> wrote:
> Sure. Though I wonder if sys_frevoke is enough for us and we can drop
> sys_revoke completely.

No, you want sys_revoke/sys_revokeat.  Simplest case: /dev/rst0 vs
/dev/nrst0.  You don't want the open rewind the drive.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* O_CAREFUL flag to disable open() side effects
  2006-07-27 17:33     ` Alan Cox
@ 2006-07-27 17:33       ` H. Peter Anvin
  2006-07-27 17:43         ` Russell King
                           ` (2 more replies)
  2006-08-05 21:05       ` [RFC/PATCH] revoke/frevoke system calls V2 Pavel Machek
  1 sibling, 3 replies; 51+ messages in thread
From: H. Peter Anvin @ 2006-07-27 17:33 UTC (permalink / raw)
  To: Alan Cox
  Cc: Pekka J Enberg, Ulrich Drepper, linux-kernel, linux-fsdevel, akpm,
	viro, tytso, tigran

Alan Cox wrote:
> Ar Iau, 2006-07-27 am 20:05 +0300, ysgrifennodd Pekka J Enberg:
>> Sure. Though I wonder if sys_frevoke is enough for us and we can drop 
>> sys_revoke completely.
> 
> Alas not. Some Unix devices have side effects when you open() them. 
> 

Dumb thought: would it make sense to add an O_CAREFUL flag to open(), to 
disable side effects?  It seems that a number of devices have this issue 
and one have to jump through weird hoops to configure them.  Obviously, 
a file descriptor obtained with O_CAREFUL may not be fully functional, 
at the device driver's option.

For a conventional file, directory, or block device O_CAREFUL is a 
no-op.  For ttys it would typically behave similar to O_NONBLOCK 
followed immediately by a fcntl to clear the nonblock flag.

	-hpa

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-07-27 17:05   ` Pekka J Enberg
  2006-07-27 17:13     ` Ulrich Drepper
@ 2006-07-27 17:33     ` Alan Cox
  2006-07-27 17:33       ` O_CAREFUL flag to disable open() side effects H. Peter Anvin
  2006-08-05 21:05       ` [RFC/PATCH] revoke/frevoke system calls V2 Pavel Machek
  1 sibling, 2 replies; 51+ messages in thread
From: Alan Cox @ 2006-07-27 17:33 UTC (permalink / raw)
  To: Pekka J Enberg
  Cc: Ulrich Drepper, linux-kernel, linux-fsdevel, akpm, viro, tytso,
	tigran

Ar Iau, 2006-07-27 am 20:05 +0300, ysgrifennodd Pekka J Enberg:
> Sure. Though I wonder if sys_frevoke is enough for us and we can drop 
> sys_revoke completely.

Alas not. Some Unix devices have side effects when you open() them. 


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-07-27 17:13     ` Ulrich Drepper
@ 2006-07-27 17:33       ` H. Peter Anvin
  2006-07-27 17:44         ` Ulrich Drepper
  0 siblings, 1 reply; 51+ messages in thread
From: H. Peter Anvin @ 2006-07-27 17:33 UTC (permalink / raw)
  To: Ulrich Drepper
  Cc: Pekka J Enberg, linux-kernel, linux-fsdevel, akpm, viro, alan,
	tytso, tigran

Ulrich Drepper wrote:
> On 7/27/06, Pekka J Enberg <penberg@cs.helsinki.fi> wrote:
>> Sure. Though I wonder if sys_frevoke is enough for us and we can drop
>> sys_revoke completely.
> 
> No, you want sys_revoke/sys_revokeat.  Simplest case: /dev/rst0 vs
> /dev/nrst0.  You don't want the open rewind the drive.

No need to add sys_revoke.

	-hpa

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: O_CAREFUL flag to disable open() side effects
  2006-07-27 17:33       ` O_CAREFUL flag to disable open() side effects H. Peter Anvin
@ 2006-07-27 17:43         ` Russell King
  2006-07-27 17:50         ` Ulrich Drepper
  2006-07-27 18:05         ` Alan Cox
  2 siblings, 0 replies; 51+ messages in thread
From: Russell King @ 2006-07-27 17:43 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Alan Cox, Pekka J Enberg, Ulrich Drepper, linux-kernel,
	linux-fsdevel, akpm, viro, tytso, tigran

On Thu, Jul 27, 2006 at 10:33:23AM -0700, H. Peter Anvin wrote:
> Dumb thought: would it make sense to add an O_CAREFUL flag to open(), to 
> disable side effects?  It seems that a number of devices have this issue 
> and one have to jump through weird hoops to configure them.  Obviously, 
> a file descriptor obtained with O_CAREFUL may not be fully functional, 
> at the device driver's option.
> 
> For a conventional file, directory, or block device O_CAREFUL is a 
> no-op.

What about door locking on block devices?  That might be an undesirable
side effect in some circumstances, so you might not want it to be a no-op
on blockdevs.

> For ttys it would typically behave similar to O_NONBLOCK 
> followed immediately by a fcntl to clear the nonblock flag.

What about, eg, raising DTR and RTS ?  You'd want to avoid raising
those if you're not actually going to be using the port.

-- 
Russell King
 Linux kernel    2.6 ARM Linux   - http://www.arm.linux.org.uk/
 maintainer of:  2.6 Serial core

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-07-27 17:33       ` H. Peter Anvin
@ 2006-07-27 17:44         ` Ulrich Drepper
  2006-07-27 18:00           ` H. Peter Anvin
  0 siblings, 1 reply; 51+ messages in thread
From: Ulrich Drepper @ 2006-07-27 17:44 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Pekka J Enberg, linux-kernel, linux-fsdevel, akpm, viro, alan,
	tytso, tigran

On 7/27/06, H. Peter Anvin <hpa@zytor.com> wrote:
> No need to add sys_revoke.

Of course not.  I didn't imply that.  sys_revokeat is all that's
needed.  I just provided the other name to make clear they are
equivalent wrt to the file name property.  Not everyone knows the *at
functions.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: O_CAREFUL flag to disable open() side effects
  2006-07-27 17:33       ` O_CAREFUL flag to disable open() side effects H. Peter Anvin
  2006-07-27 17:43         ` Russell King
@ 2006-07-27 17:50         ` Ulrich Drepper
  2006-07-27 18:05         ` Alan Cox
  2 siblings, 0 replies; 51+ messages in thread
From: Ulrich Drepper @ 2006-07-27 17:50 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Alan Cox, Pekka J Enberg, linux-kernel, linux-fsdevel, akpm, viro,
	tytso, tigran

On 7/27/06, H. Peter Anvin <hpa@zytor.com> wrote:
> Dumb thought: would it make sense to add an O_CAREFUL flag to open(), to
> disable side effects?

Not that I don't want to be constructive, but this is something you'll
likely never be able to specify with the needed level of accuray.
What is careful?  I can imagine all kinds ideas possible ways file
systems and device drivers might want to use this.

Adding a separate interface taking a file name isn't high enough a
price to open such a can of worms.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-07-27 17:44         ` Ulrich Drepper
@ 2006-07-27 18:00           ` H. Peter Anvin
  0 siblings, 0 replies; 51+ messages in thread
From: H. Peter Anvin @ 2006-07-27 18:00 UTC (permalink / raw)
  To: Ulrich Drepper
  Cc: Pekka J Enberg, linux-kernel, linux-fsdevel, akpm, viro, alan,
	tytso, tigran

Ulrich Drepper wrote:
> On 7/27/06, H. Peter Anvin <hpa@zytor.com> wrote:
>> No need to add sys_revoke.
> 
> Of course not.  I didn't imply that.  sys_revokeat is all that's
> needed.  I just provided the other name to make clear they are
> equivalent wrt to the file name property.  Not everyone knows the *at
> functions.

Right.  We shouldn't add ANY more filename system calls at all, though. 
  They can be libc wrappers.

	-hpa

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: O_CAREFUL flag to disable open() side effects
  2006-07-27 18:05         ` Alan Cox
@ 2006-07-27 18:03           ` H. Peter Anvin
  0 siblings, 0 replies; 51+ messages in thread
From: H. Peter Anvin @ 2006-07-27 18:03 UTC (permalink / raw)
  To: Alan Cox
  Cc: Pekka J Enberg, Ulrich Drepper, linux-kernel, linux-fsdevel, akpm,
	viro, tytso, tigran

Alan Cox wrote:
> Ar Iau, 2006-07-27 am 10:33 -0700, ysgrifennodd H. Peter Anvin:
>> For a conventional file, directory, or block device O_CAREFUL is a 
>> no-op.  For ttys it would typically behave similar to O_NONBLOCK 
>> followed immediately by a fcntl to clear the nonblock flag.
> 
> Linus long ago suggested O_NONE to go with RO/RW/WO. Its not that hard
> to do with the current file op stuff but you have to work out what the
> access permission semantics of it are and what it means for ioctl etc

O_NONE might be a good thing to do with that, but I think the "careful" 
semantics should be a separate flag (we shouldn't have different side 
effects depending on the individual mode.)

O_NONE would be a useful complement to O_CAREFUL though; for some 
devices O_CAREFUL with anything *other than* O_NONE might be an invalid 
operation.

The semantics would obviously be device dependent, but the basic idea 
should be that opening with O_CAREFUL should not disturb the global 
state of the device; it should only create a handle.

	-hpa


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: O_CAREFUL flag to disable open() side effects
  2006-07-27 17:33       ` O_CAREFUL flag to disable open() side effects H. Peter Anvin
  2006-07-27 17:43         ` Russell King
  2006-07-27 17:50         ` Ulrich Drepper
@ 2006-07-27 18:05         ` Alan Cox
  2006-07-27 18:03           ` H. Peter Anvin
  2 siblings, 1 reply; 51+ messages in thread
From: Alan Cox @ 2006-07-27 18:05 UTC (permalink / raw)
  To: H. Peter Anvin
  Cc: Pekka J Enberg, Ulrich Drepper, linux-kernel, linux-fsdevel, akpm,
	viro, tytso, tigran

Ar Iau, 2006-07-27 am 10:33 -0700, ysgrifennodd H. Peter Anvin:
> For a conventional file, directory, or block device O_CAREFUL is a 
> no-op.  For ttys it would typically behave similar to O_NONBLOCK 
> followed immediately by a fcntl to clear the nonblock flag.

Linus long ago suggested O_NONE to go with RO/RW/WO. Its not that hard
to do with the current file op stuff but you have to work out what the
access permission semantics of it are and what it means for ioctl etc


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-07-27 14:25 [RFC/PATCH] revoke/frevoke system calls V2 Pekka J Enberg
  2006-07-27 15:07 ` Alan Cox
  2006-07-27 16:41 ` Ulrich Drepper
@ 2006-07-27 18:06 ` Petr Baudis
  2006-07-27 18:10   ` Pekka Enberg
  2006-07-27 18:34   ` Alan Cox
  2006-08-05 12:29 ` Pavel Machek
  3 siblings, 2 replies; 51+ messages in thread
From: Petr Baudis @ 2006-07-27 18:06 UTC (permalink / raw)
  To: Pekka J Enberg
  Cc: linux-kernel, linux-fsdevel, akpm, viro, alan, tytso, tigran

Dear diary, on Thu, Jul 27, 2006 at 04:25:07PM CEST, I got a letter
where Pekka J Enberg <penberg@cs.Helsinki.FI> said that...
> +	if (current->fsuid != inode->i_uid && !capable(CAP_FOWNER)) {
> +		err = -EPERM;
> +		goto out;
> +	}

Consider:

int main(int argc, char *argv[])
{
	int log = open(argv[1], O_WRONLY | O_APPEND);
	while (1) {
		struct { char *uname, *pwd; } *creds = get_credentials_from_user();
		int shadow = open("/etc/shadow", O_RDWR | O_APPEND);
		fprintf(log, "creds for %s lookup success: %d\n", uname, lookup_in_shadow(shadow, creds));
		do_whatever_strange(shadow);
		close(shadow);
	}
}

Make that setuid root or just create log file owned by you and make root
run it.  Should be innocent enough, right?

Well, except that you can revoke the log file before the shadow file is
opened, at which point open() probably reuses the fd and the program
conveniently logs to /etc/shadow.

You shouldn't let people do this to poor innocent processes running with
different uids.

-- 
				Petr "Pasky" Baudis
Stuff: http://pasky.or.cz/
Snow falling on Perl. White noise covering line noise.
Hides all the bugs too. -- J. Putnam

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-07-27 18:06 ` Petr Baudis
@ 2006-07-27 18:10   ` Pekka Enberg
  2006-07-27 19:30     ` Horst H. von Brand
  2006-07-27 18:34   ` Alan Cox
  1 sibling, 1 reply; 51+ messages in thread
From: Pekka Enberg @ 2006-07-27 18:10 UTC (permalink / raw)
  To: Petr Baudis; +Cc: linux-kernel, linux-fsdevel, akpm, viro, alan, tytso, tigran

On Thu, 2006-07-27 at 20:06 +0200, Petr Baudis wrote:
> Make that setuid root or just create log file owned by you and make root
> run it.  Should be innocent enough, right?
> 
> Well, except that you can revoke the log file before the shadow file is
> opened, at which point open() probably reuses the fd and the program
> conveniently logs to /etc/shadow.

No, the fd is leaked on purpose to avoid recycling. See revoke_fds for
details.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-07-27 16:30         ` Alan Cox
  2006-07-27 17:07           ` Pekka J Enberg
@ 2006-07-27 18:27           ` Pekka Enberg
  1 sibling, 0 replies; 51+ messages in thread
From: Pekka Enberg @ 2006-07-27 18:27 UTC (permalink / raw)
  To: Alan Cox; +Cc: linux-kernel, linux-fsdevel, akpm, viro, tytso, tigran

On Thu, 2006-07-27 at 17:30 +0100, Alan Cox wrote:
> Actually that isn't true either - it takes care of *regular file*
> writes. Devices will need a revoke hook and thats really probably only
> right. If they don't have one just -EOPNOTSUPP, you can check it before
> you begin any other processing so its easy to check.

I have uploaded patch with ->revoke here:

http://www.kernel.org/pub/linux/kernel/people/penberg/patches/2.6.18-rc2/revoke-system-call-V3.patch

I'll post a new one to LKML after I am done other stuff probably next
week.

			Pekka


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-07-27 18:06 ` Petr Baudis
  2006-07-27 18:10   ` Pekka Enberg
@ 2006-07-27 18:34   ` Alan Cox
  1 sibling, 0 replies; 51+ messages in thread
From: Alan Cox @ 2006-07-27 18:34 UTC (permalink / raw)
  To: Petr Baudis
  Cc: Pekka J Enberg, linux-kernel, linux-fsdevel, akpm, viro, tytso,
	tigran

Ar Iau, 2006-07-27 am 20:06 +0200, ysgrifennodd Petr Baudis:
> Well, except that you can revoke the log file before the shadow file is
> opened, at which point open() probably reuses the fd and the program
> conveniently logs to /etc/shadow.

revoke() kills *access* by that handle, it does not kill the fd. The BSD
designers thought of that one. Instead you end up with a handle that
reports -ENXIO (from memory) for I/O accesses, ioctl etc until you close
it when it goes away as expected.

Alan


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-07-27 18:10   ` Pekka Enberg
@ 2006-07-27 19:30     ` Horst H. von Brand
  2006-07-28  3:40       ` Pekka J Enberg
  0 siblings, 1 reply; 51+ messages in thread
From: Horst H. von Brand @ 2006-07-27 19:30 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Petr Baudis, linux-kernel, linux-fsdevel, akpm, viro, alan, tytso,
	tigran

Pekka Enberg <penberg@cs.helsinki.fi> wrote:
> On Thu, 2006-07-27 at 20:06 +0200, Petr Baudis wrote:
> > Make that setuid root or just create log file owned by you and make root
> > run it.  Should be innocent enough, right?
> > 
> > Well, except that you can revoke the log file before the shadow file is
> > opened, at which point open() probably reuses the fd and the program
> > conveniently logs to /etc/shadow.

> No, the fd is leaked on purpose to avoid recycling. See revoke_fds for
> details.

Doesn't that violate a POSIX guarantee that the lowest unused fd is
returned? If the leakage lasts "long enough", this gives an opportunity of
a nice DoS by using up fds...
-- 
Dr. Horst H. von Brand                   User #22616 counter.li.org
Departamento de Informatica                     Fono: +56 32 654431
Universidad Tecnica Federico Santa Maria              +56 32 654239
Casilla 110-V, Valparaiso, Chile                Fax:  +56 32 797513


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-07-27 19:30     ` Horst H. von Brand
@ 2006-07-28  3:40       ` Pekka J Enberg
  0 siblings, 0 replies; 51+ messages in thread
From: Pekka J Enberg @ 2006-07-28  3:40 UTC (permalink / raw)
  To: Horst H. von Brand
  Cc: Petr Baudis, linux-kernel, linux-fsdevel, akpm, viro, alan, tytso,
	tigran

On Thu, 27 Jul 2006, Horst H. von Brand wrote:
> Doesn't that violate a POSIX guarantee that the lowest unused fd is
> returned? If the leakage lasts "long enough", this gives an opportunity of
> a nice DoS by using up fds...

The fd is not unused, its revoked. To clarify, the potential DoS is 
inherit for revoke, but since only root or the file owner can revoke a 
file, I don't see that as a big problem in practice. The caller is 
expected to ensure that no one can reopen the file anyway.

				Pekka

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-07-27 14:25 [RFC/PATCH] revoke/frevoke system calls V2 Pekka J Enberg
                   ` (2 preceding siblings ...)
  2006-07-27 18:06 ` Petr Baudis
@ 2006-08-05 12:29 ` Pavel Machek
  2006-08-07  5:42   ` Pekka J Enberg
  2006-08-07  8:17   ` Edgar Toernig
  3 siblings, 2 replies; 51+ messages in thread
From: Pavel Machek @ 2006-08-05 12:29 UTC (permalink / raw)
  To: Pekka J Enberg
  Cc: linux-kernel, linux-fsdevel, akpm, viro, alan, tytso, tigran

Hi!

> This patch implements the revoke(2) and frevoke(2) system calls for
> all types of files. The operation is done in passes: first we replace


Do we need revoke()? open()+frevoke() should be fast enough, no?
-- 
Thanks for all the (sleeping) penguins.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-07-27 17:33     ` Alan Cox
  2006-07-27 17:33       ` O_CAREFUL flag to disable open() side effects H. Peter Anvin
@ 2006-08-05 21:05       ` Pavel Machek
  1 sibling, 0 replies; 51+ messages in thread
From: Pavel Machek @ 2006-08-05 21:05 UTC (permalink / raw)
  To: Alan Cox
  Cc: Pekka J Enberg, Ulrich Drepper, linux-kernel, linux-fsdevel, akpm,
	viro, tytso, tigran

Hi!

> > Sure. Though I wonder if sys_frevoke is enough for us and we can drop 
> > sys_revoke completely.
> 
> Alas not. Some Unix devices have side effects when you open() them. 

We probably want some other solution for those, anyway...? Like
open(..., O_NONE)?
							Pavel
-- 
Thanks for all the (sleeping) penguins.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-05 12:29 ` Pavel Machek
@ 2006-08-07  5:42   ` Pekka J Enberg
  2006-08-07  8:17   ` Edgar Toernig
  1 sibling, 0 replies; 51+ messages in thread
From: Pekka J Enberg @ 2006-08-07  5:42 UTC (permalink / raw)
  To: Pavel Machek; +Cc: linux-kernel, linux-fsdevel, akpm, viro, alan, tytso, tigran

At some point in time, I wrote:
> > This patch implements the revoke(2) and frevoke(2) system calls for
> > all types of files. The operation is done in passes: first we replace

On Sat, 5 Aug 2006, Pavel Machek wrote:
> Do we need revoke()? open()+frevoke() should be fast enough, no?

Not a speed issue, open can have side effects which we might want to 
avoid. See comments from Alan and Ulrich in this thread.

				Pekka

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-05 12:29 ` Pavel Machek
  2006-08-07  5:42   ` Pekka J Enberg
@ 2006-08-07  8:17   ` Edgar Toernig
  2006-08-07  9:51     ` Pekka Enberg
  1 sibling, 1 reply; 51+ messages in thread
From: Edgar Toernig @ 2006-08-07  8:17 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Pekka J Enberg, linux-kernel, linux-fsdevel, akpm, viro, alan,
	tytso, tigran

Pavel Machek wrote:
>
> > This patch implements the revoke(2) and frevoke(2) system calls for
> > all types of files. The operation is done in passes: first we replace
> 
> 
> Do we need revoke()? open()+frevoke() should be fast enough, no?

Why do we need [f]revoke at all?  As it doesn't implement the
BSD semantic I can't see why it's better than fuser -k.

Ciao, ET.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-07  8:17   ` Edgar Toernig
@ 2006-08-07  9:51     ` Pekka Enberg
  2006-08-07 20:41       ` Edgar Toernig
  0 siblings, 1 reply; 51+ messages in thread
From: Pekka Enberg @ 2006-08-07  9:51 UTC (permalink / raw)
  To: Edgar Toernig
  Cc: Pavel Machek, linux-kernel, linux-fsdevel, akpm, viro, alan,
	tytso, tigran

On 8/7/06, Edgar Toernig <froese@gmx.de> wrote:
> Why do we need [f]revoke at all?  As it doesn't implement the
> BSD semantic I can't see why it's better than fuser -k.

Which part of the BSD semantics is that?

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-07  9:51     ` Pekka Enberg
@ 2006-08-07 20:41       ` Edgar Toernig
  2006-08-07 22:24         ` Chase Venters
  2006-08-08 12:29         ` Alan Cox
  0 siblings, 2 replies; 51+ messages in thread
From: Edgar Toernig @ 2006-08-07 20:41 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Pavel Machek, linux-kernel, linux-fsdevel, akpm, viro, alan,
	tytso, tigran

Pekka Enberg wrote:
>
> On 8/7/06, Edgar Toernig <froese@gmx.de> wrote:
> > Why do we need [f]revoke at all?  As it doesn't implement the
> > BSD semantic I can't see why it's better than fuser -k.
> 
> Which part of the BSD semantics is that?

That which talks about character devices, in particular ttys.

NetBSD revoke(2):
|
| ... a read() from a character device file which has been revoked
| returns a count of zero (end of file), and a close() call will
| succeed.
|...
| revoke is normally used to prepare a terminal device for a new
| login session, preventing any access by a previous user of the
| terminal.

Irix revoke(2) even mentions:
|
| ERRORS:
|  ...
|  [EINVAL] The named file is not a character-special file.

It seems, revoke was intended to disable access to tty devices
from old processes in a controlled way.  Sounds sane.

Your implementation is much cruder - it simply takes the fd
away from the app; any future use gives EBADF.  As a bonus,
it works for regular files and even goes as far as destroying
all mappings of the file from all processes (even root processes).
IMVHO this is a disaster from a security and reliability point
of view.

So, the behaviour regarding ttys is completely different to
other implementations and for other types of fds the Linux
semantic seems unique (the man-pages of the other systems
are pretty silent about that).

A serious question: What do you need this feature of revoking
regular files (or block devices) for?  Maybe my imagination
is lacking, but I can't find a use where fuser(1) (or similar
tools) wouldn't be as good or even better than revoke(2).

Ciao, ET.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-07 20:41       ` Edgar Toernig
@ 2006-08-07 22:24         ` Chase Venters
  2006-08-08 12:15           ` Alan Cox
  2006-08-08 12:29         ` Alan Cox
  1 sibling, 1 reply; 51+ messages in thread
From: Chase Venters @ 2006-08-07 22:24 UTC (permalink / raw)
  To: Edgar Toernig
  Cc: Pekka Enberg, Pavel Machek, linux-kernel, linux-fsdevel, akpm,
	viro, alan, tytso, tigran

On Mon, 7 Aug 2006, Edgar Toernig wrote:

>
> Your implementation is much cruder - it simply takes the fd
> away from the app; any future use gives EBADF.  As a bonus,
> it works for regular files and even goes as far as destroying
> all mappings of the file from all processes (even root processes).
> IMVHO this is a disaster from a security and reliability point
> of view.
>

I can see the value in these system calls, but I agree that the 
implementation is crude. "EBADF" is not something that applications are 
taught to expect. Someone correct me if I'm wrong, but I can think of no 
situation under which a file descriptor currently gets yanked out from 
under your feet -- you should always have to formally abandon it with 
close().

This kind of thing only looks proper if it leaves the file descriptor in 
place and just returns errors / EOF when you attempt to access it.

Thanks,
Chase

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-07 22:24         ` Chase Venters
@ 2006-08-08 12:15           ` Alan Cox
  2006-08-09  8:41             ` Edgar Toernig
  0 siblings, 1 reply; 51+ messages in thread
From: Alan Cox @ 2006-08-08 12:15 UTC (permalink / raw)
  To: Chase Venters
  Cc: Edgar Toernig, Pekka Enberg, Pavel Machek, linux-kernel,
	linux-fsdevel, akpm, viro, tytso, tigran

Ar Llu, 2006-08-07 am 17:24 -0500, ysgrifennodd Chase Venters:
> implementation is crude. "EBADF" is not something that applications are 
> taught to expect. Someone correct me if I'm wrong, but I can think of no 
> situation under which a file descriptor currently gets yanked out from 
> under your feet -- you should always have to formally abandon it with 
> close().

The file descriptor is not pulled from under you, the access to it is.
This is exactly what occurs today when a tty is hung up. That could be
almost any fd because paths could be symlinks to a pty/tty pair...

In the tty case you get -ENXIO

Alan


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-07 20:41       ` Edgar Toernig
  2006-08-07 22:24         ` Chase Venters
@ 2006-08-08 12:29         ` Alan Cox
  2006-08-08 12:31           ` Pekka Enberg
                             ` (2 more replies)
  1 sibling, 3 replies; 51+ messages in thread
From: Alan Cox @ 2006-08-08 12:29 UTC (permalink / raw)
  To: Edgar Toernig
  Cc: Pekka Enberg, Pavel Machek, linux-kernel, linux-fsdevel, akpm,
	viro, tytso, tigran

Ar Llu, 2006-08-07 am 22:41 +0200, ysgrifennodd Edgar Toernig:
> It seems, revoke was intended to disable access to tty devices
> from old processes in a controlled way.  Sounds sane.

Thats the root from which it comes but that alone is insufficient which
is why our vhangup is not enough.

> Your implementation is much cruder - it simply takes the fd
> away from the app; any future use gives EBADF.  As a bonus,

It needs to give -ENXIO/0 as per BSD that much is clear.

> it works for regular files and even goes as far as destroying
> all mappings of the file from all processes (even root processes).
> IMVHO this is a disaster from a security and reliability point
> of view.

Actually its no different than if it didn't. The two are identical
behaviours.

To use revoke() I must own the file
If I own the file I can make it a symlink to a pty/tty pair
I can revoke a pty/tty pair

> A serious question: What do you need this feature of revoking
> regular files (or block devices) for?  Maybe my imagination
> is lacking, but I can't find a use where fuser(1) (or similar
> tools) wouldn't be as good or even better than revoke(2).

On a typical non-SELinux system with a typical desktop configuration
(SELinux can effectively replace revoke) you need revoke on block
devices in order to guarantee security and on other char devices for
privacy. I'll provide some demonstrations after we have revoke in some
form in the kernel and the problems in question fixed.

There are specific cases where being able to revoke access to one of
your files is useful as well, particularly if you are moving it from
open permissions to private permissions. That one is to be honest much
less interesting and it is easy enough to make our revoke()
implementation return -EINVAL.

The driver only case actually makes it a lot easier because you only
need to set some kind of f_revoked flag on files owned by that device,
truncate the virtual memory mappings and then call the driver method.
The driver would then honour ->f_revoked in its own ioctl/read/write
methods or in the helpers.

Alan


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-08 12:29         ` Alan Cox
@ 2006-08-08 12:31           ` Pekka Enberg
  2006-08-08 12:57           ` Pavel Machek
  2006-08-09  8:41           ` Edgar Toernig
  2 siblings, 0 replies; 51+ messages in thread
From: Pekka Enberg @ 2006-08-08 12:31 UTC (permalink / raw)
  To: Alan Cox
  Cc: Edgar Toernig, Pavel Machek, linux-kernel, linux-fsdevel, akpm,
	viro, tytso, tigran

Ar Llu, 2006-08-07 am 22:41 +0200, ysgrifennodd Edgar Toernig:
> > Your implementation is much cruder - it simply takes the fd
> > away from the app; any future use gives EBADF.  As a bonus,

On 8/8/06, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> It needs to give -ENXIO/0 as per BSD that much is clear.

I assume you mean devices only? EBADF makes sense for regular files,
except for close(2), maybe, for which zero is probably more
appropriate.

                                          Pekka

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-08 12:29         ` Alan Cox
  2006-08-08 12:31           ` Pekka Enberg
@ 2006-08-08 12:57           ` Pavel Machek
  2006-08-08 14:14             ` Alan Cox
  2006-08-09  8:41           ` Edgar Toernig
  2 siblings, 1 reply; 51+ messages in thread
From: Pavel Machek @ 2006-08-08 12:57 UTC (permalink / raw)
  To: Alan Cox
  Cc: Edgar Toernig, Pekka Enberg, linux-kernel, linux-fsdevel, akpm,
	viro, tytso, tigran

Hi!

> > it works for regular files and even goes as far as destroying
> > all mappings of the file from all processes (even root processes).
> > IMVHO this is a disaster from a security and reliability point
> > of view.
> 
> Actually its no different than if it didn't. The two are identical
> behaviours.
> 
> To use revoke() I must own the file
> If I own the file I can make it a symlink to a pty/tty pair
> I can revoke a pty/tty pair

How can you symlink opened file?

-- 
Thanks for all the (sleeping) penguins.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-08 14:14             ` Alan Cox
@ 2006-08-08 13:57               ` Pavel Machek
  0 siblings, 0 replies; 51+ messages in thread
From: Pavel Machek @ 2006-08-08 13:57 UTC (permalink / raw)
  To: Alan Cox
  Cc: Edgar Toernig, Pekka Enberg, linux-kernel, linux-fsdevel, akpm,
	viro, tytso, tigran

Hi!

> > > To use revoke() I must own the file
> > > If I own the file I can make it a symlink to a pty/tty pair
> > > I can revoke a pty/tty pair
> > 
> > How can you symlink opened file?
> 
> I make a symlink before running the app which opens it. Or if the app
> doesn't open it I pass the file handle of a pty/tty pair to it.

Okay, I guess marginal app could do open, then fstat to make sure it
is not pty/tty, then proceed assuming it can't go away. But I see that
chances of _that_ happening are slim.
								Pavel
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-08 12:57           ` Pavel Machek
@ 2006-08-08 14:14             ` Alan Cox
  2006-08-08 13:57               ` Pavel Machek
  0 siblings, 1 reply; 51+ messages in thread
From: Alan Cox @ 2006-08-08 14:14 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Edgar Toernig, Pekka Enberg, linux-kernel, linux-fsdevel, akpm,
	viro, tytso, tigran

Ar Maw, 2006-08-08 am 12:57 +0000, ysgrifennodd Pavel Machek:
> > To use revoke() I must own the file
> > If I own the file I can make it a symlink to a pty/tty pair
> > I can revoke a pty/tty pair
> 
> How can you symlink opened file?

I make a symlink before running the app which opens it. Or if the app
doesn't open it I pass the file handle of a pty/tty pair to it.

Alan


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-08 12:29         ` Alan Cox
  2006-08-08 12:31           ` Pekka Enberg
  2006-08-08 12:57           ` Pavel Machek
@ 2006-08-09  8:41           ` Edgar Toernig
  2006-08-09 10:42             ` Alan Cox
  2 siblings, 1 reply; 51+ messages in thread
From: Edgar Toernig @ 2006-08-09  8:41 UTC (permalink / raw)
  To: Alan Cox
  Cc: Pekka Enberg, Pavel Machek, linux-kernel, linux-fsdevel, akpm,
	viro, tytso, tigran

Alan Cox wrote:
>
> Ar Llu, 2006-08-07 am 22:41 +0200, ysgrifennodd Edgar Toernig:
> >
> > Your implementation is much cruder - it simply takes the fd
> > away from the app; any future use gives EBADF.  As a bonus,
> 
> It needs to give -ENXIO/0 as per BSD that much is clear.

Ah, OK.  And not to forget select/poll.  (What about SIGHUP?)
I'm not sure though, whether it's really necessary to allow the
owner of a file to revoke fds - I would feel better if only root
(or someone with the right caps) could revoke fds/mappings.

> To use revoke() I must own the file
> If I own the file I can make it a symlink to a pty/tty pair
> I can revoke a pty/tty pair

With the EIO/EOF behaviour that's not a problem - apps that deal
with ttys have to expect that condition.


> > A serious question: What do you need this feature of revoking
> > regular files (or block devices) for?  Maybe my imagination
> > is lacking, but I can't find a use where fuser(1) (or similar
> > tools) wouldn't be as good or even better than revoke(2).
> 
> On a typical non-SELinux system with a typical desktop configuration
> (SELinux can effectively replace revoke) you need revoke on block
> devices in order to guarantee security

Hmm... which apps have an open fd on block devices?  Usually a
filesystem is mounted on the device and then there are no fds
to the block-dev involved.  Or do you expect the "fuser -m"
behaviour from revoke?  Afaics, that's not the case at the moment.
Which users have perms to access a block-dev anyway?

> There are specific cases where being able to revoke access to one of
> your files is useful as well, particularly if you are moving it from
> open permissions to private permissions. That one is to be honest much
> less interesting and it is easy enough to make our revoke()
> implementation return -EINVAL.

Hmm... then use fuser and kill the process instead of silently taking
away fds and mappings.


My summary: revoke on chars devs with EIO/EOF behaviour is ok.
            revoke on blocks devs is questionable
            revoke on regular files is wrong.

Ciao, ET.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-08 12:15           ` Alan Cox
@ 2006-08-09  8:41             ` Edgar Toernig
  2006-08-09 10:39               ` Alan Cox
  0 siblings, 1 reply; 51+ messages in thread
From: Edgar Toernig @ 2006-08-09  8:41 UTC (permalink / raw)
  To: Alan Cox
  Cc: Chase Venters, Pekka Enberg, Pavel Machek, linux-kernel,
	linux-fsdevel, akpm, viro, tytso, tigran

Alan Cox wrote:
>
> Ar Llu, 2006-08-07 am 17:24 -0500, ysgrifennodd Chase Venters:
> > implementation is crude. "EBADF" is not something that applications are 
> > taught to expect. Someone correct me if I'm wrong, but I can think of no 
> > situation under which a file descriptor currently gets yanked out from 
> > under your feet -- you should always have to formally abandon it with 
> > close().
> 
> The file descriptor is not pulled from under you, the access to it is.
> This is exactly what occurs today when a tty is hung up.

If I read the code correctly, the behaviour for hung up ttys is completely
different: read returns EOF, write returns EIO, select/poll/epoll return
ready, close works.  As rather boring but totally sane behaviour for an fd.

But after revoke you get EBADF for any operation, even select or close.
The fd becomes nearly indistinguishable from a really closed fd (the only
difference is that the fd-number won't be reused (potentional DoS)).
And IMHO that's insane that a regular user may close fds in someone else's
processes (or munmap some of its memory).  I already see people trying
to exploit bugs in system services:

	for (;;) revoke("index.html");
	for (;;) revoke("some_print_job");
	for (;;) revoke("some_mail");

Ciao, ET.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-09  8:41             ` Edgar Toernig
@ 2006-08-09 10:39               ` Alan Cox
  2006-08-09 18:00                 ` Edgar Toernig
  0 siblings, 1 reply; 51+ messages in thread
From: Alan Cox @ 2006-08-09 10:39 UTC (permalink / raw)
  To: Edgar Toernig
  Cc: Chase Venters, Pekka Enberg, Pavel Machek, linux-kernel,
	linux-fsdevel, akpm, viro, tytso, tigran

Ar Mer, 2006-08-09 am 10:41 +0200, ysgrifennodd Edgar Toernig:
> If I read the code correctly, the behaviour for hung up ttys is completely
> different: read returns EOF, write returns EIO, select/poll/epoll return
> ready, close works.  As rather boring but totally sane behaviour for an fd.
> 
> But after revoke you get EBADF for any operation, even select or close.

Thats a detail of the proposed implementation that isn't hard to fix.

> And IMHO that's insane that a regular user may close fds in someone else's
> processes (or munmap some of its memory).  I already see people trying
> to exploit bugs in system services:

I can do this already today. In fact the index.html one can be used to
crash certain products now depending on their configuration. Just do

	while { true } do
		cp some.html index.html
		> index.html
	done

with a shell that truncates on > and you'll be able to bus error them if
they mmap and are not well written.

These are not actually changes in behaviour. At any point I can shrink a
file I own and you get read -> 0, mmap access -> bus error. Write
behaviour is new but thats no different to filling the disk up or other
real write errors.



^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-09  8:41           ` Edgar Toernig
@ 2006-08-09 10:42             ` Alan Cox
  2006-08-09 18:00               ` Edgar Toernig
  0 siblings, 1 reply; 51+ messages in thread
From: Alan Cox @ 2006-08-09 10:42 UTC (permalink / raw)
  To: Edgar Toernig
  Cc: Pekka Enberg, Pavel Machek, linux-kernel, linux-fsdevel, akpm,
	viro, tytso, tigran

Ar Mer, 2006-08-09 am 10:41 +0200, ysgrifennodd Edgar Toernig:
> > If I own the file I can make it a symlink to a pty/tty pair
> > I can revoke a pty/tty pair
> 
> With the EIO/EOF behaviour that's not a problem - apps that deal
> with ttys have to expect that condition.

Think about it a moment - I can symlink any file to a tty/pty pair so
any file I own you open might be a tty.

> Hmm... which apps have an open fd on block devices?  Usually a

cdrecord, cd audio players, eject, ....



^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-09 10:39               ` Alan Cox
@ 2006-08-09 18:00                 ` Edgar Toernig
  2006-08-09 18:36                   ` Alan Cox
  2006-08-11  7:52                   ` Helge Hafting
  0 siblings, 2 replies; 51+ messages in thread
From: Edgar Toernig @ 2006-08-09 18:00 UTC (permalink / raw)
  To: Alan Cox
  Cc: Chase Venters, Pekka Enberg, Pavel Machek, linux-kernel,
	linux-fsdevel, akpm, viro, tytso, tigran

Alan Cox wrote:
>
> Ar Mer, 2006-08-09 am 10:41 +0200, ysgrifennodd Edgar Toernig:
> > 
> > But after revoke you get EBADF for any operation, even select or close.
> 
> Thats a detail of the proposed implementation that isn't hard to fix.

Fine.

> > And IMHO that's insane that a regular user may close fds in someone else's
> > processes (or munmap some of its memory).  I already see people trying
> > to exploit bugs in system services:
> 
> I can do this already today. In fact the index.html one can be used to
> crash certain products now depending on their configuration. Just do
> 
> 	while { true } do
> 		cp some.html index.html
> 		> index.html
> 	done
> 
> with a shell that truncates on > and you'll be able to bus error them if
> they mmap and are not well written.

I wasn't aware of that (and I would definitely prefer a different behaviour).

But anyway, correct me if I'm wrong, revoke (V2) not simply removes the
pages from the mmaped area as truncating does (the vma stays);  revoke
seems to completely remove the vma which is clearly a security bug.
Future mappings may silently get mapped into the area of the revoked
file without the app noticing it.  It may then hand out data of the new
file still thinking it's sending the old one.

Ciao, ET.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-09 10:42             ` Alan Cox
@ 2006-08-09 18:00               ` Edgar Toernig
  2006-08-09 18:35                 ` Alan Cox
  0 siblings, 1 reply; 51+ messages in thread
From: Edgar Toernig @ 2006-08-09 18:00 UTC (permalink / raw)
  To: Alan Cox
  Cc: Pekka Enberg, Pavel Machek, linux-kernel, linux-fsdevel, akpm,
	viro, tytso, tigran

Alan Cox wrote:
>
> Ar Mer, 2006-08-09 am 10:41 +0200, ysgrifennodd Edgar Toernig:
> > > If I own the file I can make it a symlink to a pty/tty pair
> > > I can revoke a pty/tty pair
> > 
> > With the EIO/EOF behaviour that's not a problem - apps that deal
> > with ttys have to expect that condition.
> 
> Think about it a moment - I can symlink any file to a tty/pty pair so
> any file I own you open might be a tty.

Yes, OK.  The EIO/EOF behaviour is fine.  Even for regular files it's
not something extraordinary.

> > Hmm... which apps have an open fd on block devices?  Usually a
> 
> cdrecord, cd audio players, eject, ....

And killing them is not OK?  "fuser -km /dev/cdrom" already covers both
cases, mounted somewhere and opened for special access.


Sorry if I sound a little bit anal.  IMO, a generic revoke is a pretty
sharp sword which is given to ordinary users and I have a very uneasy
feeling.  They can dig in the innards of other people's processes - a
clean headshot by root is something different ...

Ciao, ET.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-09 18:00               ` Edgar Toernig
@ 2006-08-09 18:35                 ` Alan Cox
  2006-08-09 19:14                   ` Pekka Enberg
  0 siblings, 1 reply; 51+ messages in thread
From: Alan Cox @ 2006-08-09 18:35 UTC (permalink / raw)
  To: Edgar Toernig
  Cc: Pekka Enberg, Pavel Machek, linux-kernel, linux-fsdevel, akpm,
	viro, tytso, tigran

Ar Mer, 2006-08-09 am 20:00 +0200, ysgrifennodd Edgar Toernig:
> And killing them is not OK?  "fuser -km /dev/cdrom" already covers both
> cases, mounted somewhere and opened for special access.

fuser is quite easy to race, it doesn't handle all sorts of corner cases
like namespaces either. Its a crude blunt instrument that sometimes
works and is very slow.

> Sorry if I sound a little bit anal.  IMO, a generic revoke is a pretty
> sharp sword which is given to ordinary users and I have a very uneasy
> feeling.  They can dig in the innards of other people's processes - a
> clean headshot by root is something different ...

I can see your concern about arbitary files, but I'm not sure it holds
simply because the tricks already exist via other methods.


^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-09 18:00                 ` Edgar Toernig
@ 2006-08-09 18:36                   ` Alan Cox
  2006-08-09 19:13                     ` Pekka Enberg
  2006-08-11  7:52                   ` Helge Hafting
  1 sibling, 1 reply; 51+ messages in thread
From: Alan Cox @ 2006-08-09 18:36 UTC (permalink / raw)
  To: Edgar Toernig
  Cc: Chase Venters, Pekka Enberg, Pavel Machek, linux-kernel,
	linux-fsdevel, akpm, viro, tytso, tigran

Ar Mer, 2006-08-09 am 20:00 +0200, ysgrifennodd Edgar Toernig:
> But anyway, correct me if I'm wrong, revoke (V2) not simply removes the
> pages from the mmaped area as truncating does (the vma stays);  revoke
> seems to completely remove the vma which is clearly a security bug.
> Future mappings may silently get mapped into the area of the revoked
> file without the app noticing it.  It may then hand out data of the new
> file still thinking it's sending the old one.

I agree with that point 100%.



^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-09 18:36                   ` Alan Cox
@ 2006-08-09 19:13                     ` Pekka Enberg
  2006-08-09 20:08                       ` Edgar Toernig
  2006-08-09 21:29                       ` Edgar Toernig
  0 siblings, 2 replies; 51+ messages in thread
From: Pekka Enberg @ 2006-08-09 19:13 UTC (permalink / raw)
  To: Alan Cox
  Cc: Edgar Toernig, Chase Venters, Pavel Machek, linux-kernel,
	linux-fsdevel, akpm, viro, tytso, tigran

Ar Mer, 2006-08-09 am 20:00 +0200, ysgrifennodd Edgar Toernig:
> > But anyway, correct me if I'm wrong, revoke (V2) not simply removes the
> > pages from the mmaped area as truncating does (the vma stays);  revoke
> > seems to completely remove the vma which is clearly a security bug.
> > Future mappings may silently get mapped into the area of the revoked
> > file without the app noticing it.  It may then hand out data of the new
> > file still thinking it's sending the old one.

On 8/9/06, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> I agree with that point 100%.

Agreed also. I already had a version that simply replaced the ->nopage
method of vma_ops but took what forced unmount patch had to get proper
->close dealings. But I completely agree that sane revocation should
allow close(2) and munmap(2) on the revoked fd and shared mapping.
I'll put them on my todo and in the meanwhile, you can find the latest
patches here: http://www.kernel.org/pub/linux/kernel/people/penberg/patches/revoke/

Thanks for taking the time to review the patch!

                                     Pekka

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-09 18:35                 ` Alan Cox
@ 2006-08-09 19:14                   ` Pekka Enberg
  0 siblings, 0 replies; 51+ messages in thread
From: Pekka Enberg @ 2006-08-09 19:14 UTC (permalink / raw)
  To: Alan Cox
  Cc: Edgar Toernig, Pavel Machek, linux-kernel, linux-fsdevel, akpm,
	viro, tytso, tigran

On 8/9/06, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> I can see your concern about arbitary files, but I'm not sure it holds
> simply because the tricks already exist via other methods.

Agreed. We should support close(2) and munmap(2), though.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-09 19:13                     ` Pekka Enberg
@ 2006-08-09 20:08                       ` Edgar Toernig
  2006-08-09 21:29                       ` Edgar Toernig
  1 sibling, 0 replies; 51+ messages in thread
From: Edgar Toernig @ 2006-08-09 20:08 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Alan Cox, Chase Venters, Pavel Machek, linux-kernel,
	linux-fsdevel, akpm, viro, tytso, tigran

Pekka Enberg wrote:
>
> I'll put them on my todo and in the meanwhile, you can find the latest
> patches here:
>   http://www.kernel.org/pub/linux/kernel/people/penberg/patches/revoke/
> 
> Thanks for taking the time to review the patch!

+  retry:
+	if (signal_pending(current)) {
+		err = -ERESTARTSYS;
+		goto out;
+	}
+
+	to_close = alloc_revoke_table(inode, to_exclude, &max_fds);
+	if (!to_close) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	read_lock(&tasklist_lock);
+
+	/*
+	 * If someone forked while we were allocating memory, try again.
+	 */
+	if (inode_fds(inode, to_exclude) > max_fds) {
+		read_unlock(&tasklist_lock);
+		goto retry;
+	}

It seems, the retry is leaking the to_close table.

Ciao, ET.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-09 19:13                     ` Pekka Enberg
  2006-08-09 20:08                       ` Edgar Toernig
@ 2006-08-09 21:29                       ` Edgar Toernig
  1 sibling, 0 replies; 51+ messages in thread
From: Edgar Toernig @ 2006-08-09 21:29 UTC (permalink / raw)
  To: Pekka Enberg
  Cc: Alan Cox, Chase Venters, Pavel Machek, linux-kernel,
	linux-fsdevel, akpm, viro, tytso, tigran

Pekka Enberg wrote:
>
> I'll put them on my todo and in the meanwhile, you can find the latest
> patches here:
>   http://www.kernel.org/pub/linux/kernel/people/penberg/patches/revoke/
> 
> Thanks for taking the time to review the patch!

+		err = close_files(this);
+
+		put_task_struct(this->owner);
+		if (err)
+			break;
+	}
+	if (err)
+		restore_files(&to_cleanup[i], nr_fds-i);

I think, the error path is wrong as it tries to restore "this"
which means the now invalid filp (close always closes, even in
case of errors) is put back into the fd-table; and, the task
struct is put twice.  I think, you should ignore errors on close.
(But I guess, this part of revoke gets rewritten anyway to match
BSD behaviour.)  I wonder, if revoke should really abort when
there's an error from one fd or better continue and try its best.

restore_files:
+			spin_lock(&files->file_lock);
+			fdt = files_fdtable(files);
+			rcu_assign_pointer(fdt->fd[this->fd], this->file);
+			FD_SET(this->fd, fdt->close_on_exec);
+			spin_unlock(&files->file_lock);
+			put_files_struct(files);
+		}
+
+		put_task_struct(this->owner);

This sets close_on_exec unconditionally, even if it wasn't set
before.  Hm..., if a cloned thread is able to exec, it seems a
little bit dangerous to restore the fd-table with filps that
were valid some time ago - the fd-table may have changed in the
meantime...  But maybe I simply missed something...

Ciao, ET.

^ permalink raw reply	[flat|nested] 51+ messages in thread

* Re: [RFC/PATCH] revoke/frevoke system calls V2
  2006-08-09 18:00                 ` Edgar Toernig
  2006-08-09 18:36                   ` Alan Cox
@ 2006-08-11  7:52                   ` Helge Hafting
  1 sibling, 0 replies; 51+ messages in thread
From: Helge Hafting @ 2006-08-11  7:52 UTC (permalink / raw)
  To: Edgar Toernig
  Cc: Alan Cox, Chase Venters, Pekka Enberg, Pavel Machek, linux-kernel,
	linux-fsdevel, akpm, viro, tytso, tigran

Edgar Toernig wrote:
[...]
> I wasn't aware of that (and I would definitely prefer a different behaviour).
>
> But anyway, correct me if I'm wrong, revoke (V2) not simply removes the
> pages from the mmaped area as truncating does (the vma stays);  revoke
> seems to completely remove the vma which is clearly a security bug.
> Future mappings may silently get mapped into the area of the revoked
> file without the app noticing it.  It may then hand out data of the new
> file still thinking it's sending the old one.
>   
One could remap to /dev/null - the file would then be free to be
umounted, but the app could get confused. Or map inaccessible
pages, so the app segfault on the next access.

Helge Hafting

^ permalink raw reply	[flat|nested] 51+ messages in thread

end of thread, other threads:[~2006-08-11  7:55 UTC | newest]

Thread overview: 51+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-07-27 14:25 [RFC/PATCH] revoke/frevoke system calls V2 Pekka J Enberg
2006-07-27 15:07 ` Alan Cox
2006-07-27 15:33   ` Pekka Enberg
2006-07-27 16:09     ` Alan Cox
2006-07-27 16:01       ` Pekka J Enberg
2006-07-27 16:30         ` Alan Cox
2006-07-27 17:07           ` Pekka J Enberg
2006-07-27 18:27           ` Pekka Enberg
2006-07-27 16:41 ` Ulrich Drepper
2006-07-27 17:05   ` Pekka J Enberg
2006-07-27 17:13     ` Ulrich Drepper
2006-07-27 17:33       ` H. Peter Anvin
2006-07-27 17:44         ` Ulrich Drepper
2006-07-27 18:00           ` H. Peter Anvin
2006-07-27 17:33     ` Alan Cox
2006-07-27 17:33       ` O_CAREFUL flag to disable open() side effects H. Peter Anvin
2006-07-27 17:43         ` Russell King
2006-07-27 17:50         ` Ulrich Drepper
2006-07-27 18:05         ` Alan Cox
2006-07-27 18:03           ` H. Peter Anvin
2006-08-05 21:05       ` [RFC/PATCH] revoke/frevoke system calls V2 Pavel Machek
2006-07-27 18:06 ` Petr Baudis
2006-07-27 18:10   ` Pekka Enberg
2006-07-27 19:30     ` Horst H. von Brand
2006-07-28  3:40       ` Pekka J Enberg
2006-07-27 18:34   ` Alan Cox
2006-08-05 12:29 ` Pavel Machek
2006-08-07  5:42   ` Pekka J Enberg
2006-08-07  8:17   ` Edgar Toernig
2006-08-07  9:51     ` Pekka Enberg
2006-08-07 20:41       ` Edgar Toernig
2006-08-07 22:24         ` Chase Venters
2006-08-08 12:15           ` Alan Cox
2006-08-09  8:41             ` Edgar Toernig
2006-08-09 10:39               ` Alan Cox
2006-08-09 18:00                 ` Edgar Toernig
2006-08-09 18:36                   ` Alan Cox
2006-08-09 19:13                     ` Pekka Enberg
2006-08-09 20:08                       ` Edgar Toernig
2006-08-09 21:29                       ` Edgar Toernig
2006-08-11  7:52                   ` Helge Hafting
2006-08-08 12:29         ` Alan Cox
2006-08-08 12:31           ` Pekka Enberg
2006-08-08 12:57           ` Pavel Machek
2006-08-08 14:14             ` Alan Cox
2006-08-08 13:57               ` Pavel Machek
2006-08-09  8:41           ` Edgar Toernig
2006-08-09 10:42             ` Alan Cox
2006-08-09 18:00               ` Edgar Toernig
2006-08-09 18:35                 ` Alan Cox
2006-08-09 19:14                   ` Pekka Enberg

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).