public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [RFC][PATCH] ->signal->tty locking
@ 2006-10-16  9:53 Peter Zijlstra
  2006-10-16 13:39 ` Prarit Bhargava
  2006-10-17  8:10 ` Oleg Nesterov
  0 siblings, 2 replies; 10+ messages in thread
From: Peter Zijlstra @ 2006-10-16  9:53 UTC (permalink / raw)
  To: linux-kernel; +Cc: Prarit Bhargava, Oleg Nesterov, Alan Cox

Oleg wrote:
"Historically ->signal/->sighand (both ptrs and their contents) were globally
protected by tasklist_lock. 'current' can use these pointers lockless, they
can't be changed under him.

Nowadays ->signal/->sighand are _also_ protected by ->sighand->siglock.
Unless you are current, you can't lock ->siglock directly (without holding
tasklist_lock), you should use lock_task_sighand()."

Then, to be consistent with the rest of the kernel, ->signal->tty
locking should look like so:

  mutex_lock(&tty_mutex)
    read_lock(&tasklist_lock)
      lock_task_sighand(p, &flags)

However, lock_task_sighand(), is a conditional lock, p might not have a
->sighand, in which case it returns NULL. What would that mean for
->signal, can I then still modify it?

struct sighand_struct *sighand = lock_task_sighand(p, &flags);
if (sighand) {
	/* modify/use ->signal->tty */
	unlock_task_sighand(p, &flags);
} else
	/* now what !? */

The same problem appears in fs/proc/array.c:do_task_stat(), there the
locking doesn't look quite right either.

Before realizing this I came this far:
---
 drivers/char/tty_io.c |   86 ++++++++++++++++++++++++++++++++++++++------------
 kernel/auditsc.c      |    1 
 kernel/exit.c         |    3 +
 kernel/sys.c          |    4 ++
 4 files changed, 75 insertions(+), 19 deletions(-)

Index: linux-2.6/drivers/char/tty_io.c
===================================================================
--- linux-2.6.orig/drivers/char/tty_io.c
+++ linux-2.6/drivers/char/tty_io.c
@@ -63,6 +63,12 @@
  *
  * Move do_SAK() into process context.  Less stack use in devfs functions.
  * alloc_tty_struct() always uses kmalloc() -- Andrew Morton <andrewm@uow.edu.eu> 17Mar01
+ *
+ * A word on (struct task)::signal->tty locking
+ *
+ *   mutex_lock(&tty_mutex)
+ *     read_lock(&tasklist_lock)
+ *       lock_task_sighand()
  */
 
 #include <linux/types.h>
@@ -1282,6 +1288,7 @@ static void do_tty_hangup(void *data)
 	struct task_struct *p;
 	struct tty_ldisc *ld;
 	int    closecount = 0, n;
+	unsigned long flags;
 
 	if (!tty)
 		return;
@@ -1350,20 +1357,26 @@ static void do_tty_hangup(void *data)
 	  This should get done automatically when the port closes and
 	  tty_release is called */
 	
+	mutex_lock(&tty_mutex);
 	read_lock(&tasklist_lock);
 	if (tty->session > 0) {
 		do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+			lock_task_sighand(p, &flags);
 			if (p->signal->tty == tty)
 				p->signal->tty = NULL;
+			unlock_task_sighand(p, &flags);
 			if (!p->signal->leader)
 				continue;
 			group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
 			group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
+			lock_task_sighand(p, &flags);
 			if (tty->pgrp > 0)
 				p->signal->tty_old_pgrp = tty->pgrp;
+			unlock_task_sighand(p, &flags);
 		} while_each_task_pid(tty->session, PIDTYPE_SID, p);
 	}
 	read_unlock(&tasklist_lock);
+	mutex_unlock(&tty_mutex);
 
 	tty->flags = 0;
 	tty->session = 0;
@@ -1479,10 +1492,12 @@ void disassociate_ctty(int on_exit)
 	struct tty_struct *tty;
 	struct task_struct *p;
 	int tty_pgrp = -1;
+	unsigned long flags;
 
 	lock_kernel();
 
 	mutex_lock(&tty_mutex);
+	/* XXX is this save wrt siglock!? */
 	tty = current->signal->tty;
 	if (tty) {
 		tty_pgrp = tty->pgrp;
@@ -1490,9 +1505,10 @@ void disassociate_ctty(int on_exit)
 		if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY)
 			tty_vhangup(tty);
 	} else {
-		if (current->signal->tty_old_pgrp) {
-			kill_pg(current->signal->tty_old_pgrp, SIGHUP, on_exit);
-			kill_pg(current->signal->tty_old_pgrp, SIGCONT, on_exit);
+		pid_t old_pgrp = current->signal->tty_old_pgrp;
+		if (old_pgrp) {
+			kill_pg(old_pgrp, SIGHUP, on_exit);
+			kill_pg(old_pgrp, SIGCONT, on_exit);
 		}
 		mutex_unlock(&tty_mutex);
 		unlock_kernel();	
@@ -1506,14 +1522,27 @@ void disassociate_ctty(int on_exit)
 
 	/* Must lock changes to tty_old_pgrp */
 	mutex_lock(&tty_mutex);
+	lock_task_sighand(current, &flags);
 	current->signal->tty_old_pgrp = 0;
-	tty->session = 0;
-	tty->pgrp = -1;
+
+	/* It is possible that do_tty_hangup has free'd this tty */
+	if (current->signal->tty) {
+		current->signal->tty->session = 0;
+		current->signal->tty->pgrp = 0;
+	} else {
+#ifdef TTY_DEBUG_HANGUP
+		printk(KERN_DEBUG "error attempted to write to tty [0x%p]"
+		       " = NULL", tty);
+#endif
+	}
+	unlock_task_sighand(current, &flags);
 
 	/* Now clear signal->tty under the lock */
 	read_lock(&tasklist_lock);
 	do_each_task_pid(current->signal->session, PIDTYPE_SID, p) {
+		lock_task_sighand(p, &flags);
 		p->signal->tty = NULL;
+		unlock_task_sighand(p, &flags);
 	} while_each_task_pid(current->signal->session, PIDTYPE_SID, p);
 	read_unlock(&tasklist_lock);
 	mutex_unlock(&tty_mutex);
@@ -2340,11 +2369,15 @@ static void release_dev(struct file * fi
 
 		read_lock(&tasklist_lock);
 		do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+			lock_task_sighand(p, &flags);
 			p->signal->tty = NULL;
+			unlock_task_sighand(p, &flags);
 		} while_each_task_pid(tty->session, PIDTYPE_SID, p);
 		if (o_tty)
 			do_each_task_pid(o_tty->session, PIDTYPE_SID, p) {
+				lock_task_sighand(p, &flags);
 				p->signal->tty = NULL;
+				unlock_task_sighand(p, &flags);
 			} while_each_task_pid(o_tty->session, PIDTYPE_SID, p);
 		read_unlock(&tasklist_lock);
 	}
@@ -2455,6 +2488,7 @@ static int tty_open(struct inode * inode
 	int index;
 	dev_t device = inode->i_rdev;
 	unsigned short saved_flags = filp->f_flags;
+	unsigned long flags;
 
 	nonseekable_open(inode, filp);
 	
@@ -2466,7 +2500,9 @@ retry_open:
 	mutex_lock(&tty_mutex);
 
 	if (device == MKDEV(TTYAUX_MAJOR,0)) {
+		lock_task_sighand(current, &flags);
 		if (!current->signal->tty) {
+			unlock_task_sighand(current, &flags);
 			mutex_unlock(&tty_mutex);
 			return -ENXIO;
 		}
@@ -2474,6 +2510,7 @@ retry_open:
 		index = current->signal->tty->index;
 		filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
 		/* noctty = 1; */
+		unlock_task_sighand(current, &flags);
 		goto got_driver;
 	}
 #ifdef CONFIG_VT
@@ -2546,17 +2583,21 @@ got_driver:
 			filp->f_op = &tty_fops;
 		goto retry_open;
 	}
+
+	mutex_lock(&tty_mutex);
+	lock_task_sighand(current, &flags);
 	if (!noctty &&
 	    current->signal->leader &&
 	    !current->signal->tty &&
 	    tty->session == 0) {
-	    	task_lock(current);
 		current->signal->tty = tty;
-		task_unlock(current);
 		current->signal->tty_old_pgrp = 0;
 		tty->session = current->signal->session;
 		tty->pgrp = process_group(current);
 	}
+	/* don't we leak tty in the else case !? */
+	unlock_task_sighand(current, &flags);
+	mutex_unlock(&tty_mutex);
 	return 0;
 }
 
@@ -2888,6 +2929,7 @@ static int fionbio(struct file *file, in
 static int tiocsctty(struct tty_struct *tty, int arg)
 {
 	struct task_struct *p;
+	unsigned long flags;
 
 	if (current->signal->leader &&
 	    (current->signal->session == tty->session))
@@ -2898,6 +2940,7 @@ static int tiocsctty(struct tty_struct *
 	 */
 	if (!current->signal->leader || current->signal->tty)
 		return -EPERM;
+	mutex_lock(&tty_mutex);
 	if (tty->session > 0) {
 		/*
 		 * This tty is already the controlling
@@ -2910,20 +2953,23 @@ static int tiocsctty(struct tty_struct *
 
 			read_lock(&tasklist_lock);
 			do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+				lock_task_sighand(p, &flags);
 				p->signal->tty = NULL;
+				unlock_task_sighand(p, &flags);
 			} while_each_task_pid(tty->session, PIDTYPE_SID, p);
 			read_unlock(&tasklist_lock);
-		} else
+		} else {
+			mutex_unlock(&tty_mutex);
 			return -EPERM;
+		}
 	}
-	mutex_lock(&tty_mutex);
-	task_lock(current);
-	current->signal->tty = tty;
-	task_unlock(current);
-	mutex_unlock(&tty_mutex);
-	current->signal->tty_old_pgrp = 0;
 	tty->session = current->signal->session;
 	tty->pgrp = process_group(current);
+	lock_task_sighand(current, &flags);
+	current->signal->tty = tty;
+	current->signal->tty_old_pgrp = 0;
+	unlock_task_sighand(current, &flags);
+	mutex_unlock(&tty_mutex);
 	return 0;
 }
 
@@ -2936,7 +2982,7 @@ static int tiocsctty(struct tty_struct *
  *	Obtain the process group of the tty. If there is no process group
  *	return an error.
  *
- *	Locking: none. Reference to ->signal->tty is safe.
+ *	Locking: none. Reference to current->signal->tty is safe.
  */
 
 static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
@@ -2994,7 +3040,7 @@ static int tiocspgrp(struct tty_struct *
  *	Obtain the session id of the tty. If there is no session
  *	return an error.
  *
- *	Locking: none. Reference to ->signal->tty is safe.
+ *	Locking: none. Reference to current->signal->tty is safe.
  */
 
 static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
@@ -3139,6 +3185,7 @@ int tty_ioctl(struct inode * inode, stru
 	void __user *p = (void __user *)arg;
 	int retval;
 	struct tty_ldisc *ld;
+	unsigned long flags;
 	
 	tty = (struct tty_struct *)file->private_data;
 	if (tty_paranoia_check(tty, inode, "tty_ioctl"))
@@ -3213,14 +3260,15 @@ int tty_ioctl(struct inode * inode, stru
 			clear_bit(TTY_EXCLUSIVE, &tty->flags);
 			return 0;
 		case TIOCNOTTY:
-			/* FIXME: taks lock or tty_mutex ? */
 			if (current->signal->tty != tty)
 				return -ENOTTY;
 			if (current->signal->leader)
 				disassociate_ctty(0);
-			task_lock(current);
+			mutex_lock(&tty_mutex);
+			lock_task_sighand(current, &flags)
 			current->signal->tty = NULL;
-			task_unlock(current);
+			unlock_task_sighand(current, &flags);
+			mutex_unlock(&tty_mutex);
 			return 0;
 		case TIOCSCTTY:
 			return tiocsctty(tty, arg);
Index: linux-2.6/kernel/auditsc.c
===================================================================
--- linux-2.6.orig/kernel/auditsc.c
+++ linux-2.6/kernel/auditsc.c
@@ -823,6 +823,7 @@ static void audit_log_exit(struct audit_
 				 context->return_code);
 
 	mutex_lock(&tty_mutex);
+	/* XXX doesn't this race like mad? */
 	if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name)
 		tty = tsk->signal->tty->name;
 	else
Index: linux-2.6/kernel/exit.c
===================================================================
--- linux-2.6.orig/kernel/exit.c
+++ linux-2.6/kernel/exit.c
@@ -370,6 +370,7 @@ void daemonize(const char *name, ...)
 	va_list args;
 	struct fs_struct *fs;
 	sigset_t blocked;
+	unsigned long flags;
 
 	va_start(args, name);
 	vsnprintf(current->comm, sizeof(current->comm), name, args);
@@ -384,7 +385,9 @@ void daemonize(const char *name, ...)
 
 	set_special_pids(1, 1);
 	mutex_lock(&tty_mutex);
+	lock_task_sighand(current, &flags);
 	current->signal->tty = NULL;
+	unlock_task_sighand(current, &flags);
 	mutex_unlock(&tty_mutex);
 
 	/* Block and flush all signals */
Index: linux-2.6/kernel/sys.c
===================================================================
--- linux-2.6.orig/kernel/sys.c
+++ linux-2.6/kernel/sys.c
@@ -1483,6 +1483,7 @@ asmlinkage long sys_setsid(void)
 	struct task_struct *group_leader = current->group_leader;
 	pid_t session;
 	int err = -EPERM;
+	unsigned long flags;
 
 	mutex_lock(&tty_mutex);
 	write_lock_irq(&tasklist_lock);
@@ -1504,8 +1505,11 @@ asmlinkage long sys_setsid(void)
 
 	group_leader->signal->leader = 1;
 	__set_special_pids(session, session);
+	/* XXX move this lock up 2 lines? */
+	lock_task_sighand(group_leader, &flags);
 	group_leader->signal->tty = NULL;
 	group_leader->signal->tty_old_pgrp = 0;
+	unlock_task_sighand(group_leader, &flags);
 	err = process_group(group_leader);
 out:
 	write_unlock_irq(&tasklist_lock);



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC][PATCH] ->signal->tty locking
  2006-10-16  9:53 [RFC][PATCH] ->signal->tty locking Peter Zijlstra
@ 2006-10-16 13:39 ` Prarit Bhargava
  2006-10-17  8:10 ` Oleg Nesterov
  1 sibling, 0 replies; 10+ messages in thread
From: Prarit Bhargava @ 2006-10-16 13:39 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-kernel



Peter Zijlstra wrote:
> Oleg wrote:
> "Historically ->signal/->sighand (both ptrs and their contents) were globally
> protected by tasklist_lock. 'current' can use these pointers lockless, they
> can't be changed under him.
>
> Nowadays ->signal/->sighand are _also_ protected by ->sighand->siglock.
> Unless you are current, you can't lock ->siglock directly (without holding
> tasklist_lock), you should use lock_task_sighand()."
>
> Then, to be consistent with the rest of the kernel, ->signal->tty
> locking should look like so:
>
>   mutex_lock(&tty_mutex)
>     read_lock(&tasklist_lock)
>       lock_task_sighand(p, &flags)
>   

It would be nice if we could clean up some of the complicated locking in 
this code.  For example, from do_tty_hangup,

 *
 *      Locking:
 *              BKL
 *              redirect lock for undoing redirection
 *              file list lock for manipulating list of ttys
 *              tty_ldisc_lock from called functions
 *              termios_sem resetting termios data
 *              tasklist_lock to walk task list for hangup event

P.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC][PATCH] ->signal->tty locking
  2006-10-16  9:53 [RFC][PATCH] ->signal->tty locking Peter Zijlstra
  2006-10-16 13:39 ` Prarit Bhargava
@ 2006-10-17  8:10 ` Oleg Nesterov
  2006-10-17 10:17   ` Peter Zijlstra
  1 sibling, 1 reply; 10+ messages in thread
From: Oleg Nesterov @ 2006-10-17  8:10 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-kernel, Prarit Bhargava, Alan Cox

On 10/16, Peter Zijlstra wrote:
>
> Oleg wrote:
> "Historically ->signal/->sighand (both ptrs and their contents) were globally
> protected by tasklist_lock. 'current' can use these pointers lockless, they
> can't be changed under him.
> 
> Nowadays ->signal/->sighand are _also_ protected by ->sighand->siglock.
> Unless you are current, you can't lock ->siglock directly (without holding
> tasklist_lock), you should use lock_task_sighand()."
> 
> Then, to be consistent with the rest of the kernel, ->signal->tty
> locking should look like so:
> 
>   mutex_lock(&tty_mutex)
>     read_lock(&tasklist_lock)
>       lock_task_sighand(p, &flags)

I've also started similar patches, but have no time to finish it.

I don't think we need tasklist_lock. I think ->sighand->siglock is enough.

So do_task_stat() doesn't need to take tty_mutex at all.

However, tty_mutex protects ->tty from release_dev(tty), so it is also
possible to do:

	mutex_lock(&tty_mutex);
	tty = task->signal->tty;
	barrier();
	if (tty) {
		// ->tty could be changed/cleared from under us,
		// but it can't be released while we are holding
		// tty_mutex
		do_something(tty);
	}
	...

But first I think we should kill task_lock(). This is changelog for
the first patch in unfinished series:

	Taking task_lock() when updating/using signal->tty doesn't help because

		- it is used only in some random places

		- signal->tty is per-process, while ->alloc_lock is per-thread

	We can improve this if we take task_lock(task->group_leader), but I think
	this is not the best option and we should use sighand->siglock instead,
	because

		- task_lock() interacts badly with write_lock_irq(&tasklist_lock),
		  sys_setsid() won't be happy.

		- unless we are 'current' or tasklist_lock is held, we anyway need
		  ->siglock to access ->signal. Actually, even reading ->group_leader
		  is not safe unless we know the task was not released.

		- most of signal_struct's contents is already protected by ->siglock,
		  why ->tty isn't?

> However, lock_task_sighand(), is a conditional lock, p might not have a
> ->sighand, in which case it returns NULL. What would that mean for
> ->signal, can I then still modify it?

This means the task has already dead, it doesn't have ->signal.

> struct sighand_struct *sighand = lock_task_sighand(p, &flags);
> if (sighand) {
> 	/* modify/use ->signal->tty */
> 	unlock_task_sighand(p, &flags);
> } else
> 	/* now what !? */

see above.

> The same problem appears in fs/proc/array.c:do_task_stat(), there the
> locking doesn't look quite right either.

Hmm. I think do_task_stat() is fine, it does nothing when lock_task_sighand()
fails.

> --- linux-2.6.orig/drivers/char/tty_io.c
> +++ linux-2.6/drivers/char/tty_io.c
> @@ -63,6 +63,12 @@
>   *
>   * Move do_SAK() into process context.  Less stack use in devfs functions.
>   * alloc_tty_struct() always uses kmalloc() -- Andrew Morton <andrewm@uow.edu.eu> 17Mar01
> + *
> + * A word on (struct task)::signal->tty locking
> + *
> + *   mutex_lock(&tty_mutex)
> + *     read_lock(&tasklist_lock)
> + *       lock_task_sighand()
>   */
>  
>  #include <linux/types.h>
> @@ -1282,6 +1288,7 @@ static void do_tty_hangup(void *data)
>  	struct task_struct *p;
>  	struct tty_ldisc *ld;
>  	int    closecount = 0, n;
> +	unsigned long flags;
>  
>  	if (!tty)
>  		return;
> @@ -1350,20 +1357,26 @@ static void do_tty_hangup(void *data)
>  	  This should get done automatically when the port closes and
>  	  tty_release is called */
>  	
> +	mutex_lock(&tty_mutex);

I am not sure it is needed.

>  	read_lock(&tasklist_lock);
>  	if (tty->session > 0) {
>  		do_each_task_pid(tty->session, PIDTYPE_SID, p) {
> +			lock_task_sighand(p, &flags);
>  			if (p->signal->tty == tty)
>  				p->signal->tty = NULL;
> +			unlock_task_sighand(p, &flags);

We don't need lock_task_sighand() here, we can use spin_lock_irq(->siglock).

We are holding tasklist_lock. This means that all tasks found by
do_each_task_pid() have a valid ->signal/->sighand != NULL.
tasklist_lock protects against release_task()->__exit_signal() and
from changing ->sighand by de_thread().

> @@ -1506,14 +1522,27 @@ void disassociate_ctty(int on_exit)
>  
>  	/* Must lock changes to tty_old_pgrp */
>  	mutex_lock(&tty_mutex);
> +	lock_task_sighand(current, &flags);

Again, we can use spin_lock_irq(current->signal->siglock). It is safe to
use current->sighand directly, it can't be freed or changed from under us.

>  	current->signal->tty_old_pgrp = 0;
> -	tty->session = 0;
> -	tty->pgrp = -1;
> +
> +	/* It is possible that do_tty_hangup has free'd this tty */
> +	if (current->signal->tty) {
> +		current->signal->tty->session = 0;
> +		current->signal->tty->pgrp = 0;
> +	} else {
> +#ifdef TTY_DEBUG_HANGUP
> +		printk(KERN_DEBUG "error attempted to write to tty [0x%p]"
> +		       " = NULL", tty);
> +#endif
> +	}
> +	unlock_task_sighand(current, &flags);
>  
>  	/* Now clear signal->tty under the lock */
>  	read_lock(&tasklist_lock);
>  	do_each_task_pid(current->signal->session, PIDTYPE_SID, p) {
> +		lock_task_sighand(p, &flags);
>  		p->signal->tty = NULL;
> +		unlock_task_sighand(p, &flags);
>  	} while_each_task_pid(current->signal->session, PIDTYPE_SID, p);
>  	read_unlock(&tasklist_lock);
>  	mutex_unlock(&tty_mutex);
> @@ -2340,11 +2369,15 @@ static void release_dev(struct file * fi
>  
>  		read_lock(&tasklist_lock);
>  		do_each_task_pid(tty->session, PIDTYPE_SID, p) {
> +			lock_task_sighand(p, &flags);
>  			p->signal->tty = NULL;
> +			unlock_task_sighand(p, &flags);
>  		} while_each_task_pid(tty->session, PIDTYPE_SID, p);
>  		if (o_tty)
>  			do_each_task_pid(o_tty->session, PIDTYPE_SID, p) {
> +				lock_task_sighand(p, &flags);
>  				p->signal->tty = NULL;
> +				unlock_task_sighand(p, &flags);
>  			} while_each_task_pid(o_tty->session, PIDTYPE_SID, p);
>  		read_unlock(&tasklist_lock);

While doing a similar changes, I introduced a couple of simple
helpers:

	static inline void proc_clear_tty(struct task_struct *p)
	{
		spin_lock_irq(&p->sighand->siglock);
		p->signal->tty = NULL;
		spin_unlock_irq(&p->sighand->siglock);
	}

	static void session_clear_tty(pid_t session)
	{
		struct task_struct *p;

		do_each_task_pid(session, PIDTYPE_SID, p) {
			proc_clear_tty(p);
		} while_each_task_pid(session, PIDTYPE_SID, p);
	}

I think it makes sense.

>  static int tiocsctty(struct tty_struct *tty, int arg)
>  {
>  	struct task_struct *p;
> +	unsigned long flags;
>  
>  	if (current->signal->leader &&
>  	    (current->signal->session == tty->session))
> @@ -2898,6 +2940,7 @@ static int tiocsctty(struct tty_struct *
>  	 */
>  	if (!current->signal->leader || current->signal->tty)
>  		return -EPERM;
> +	mutex_lock(&tty_mutex);
>  	if (tty->session > 0) {
>  		/*
>  		 * This tty is already the controlling
> @@ -2910,20 +2953,23 @@ static int tiocsctty(struct tty_struct *
>  
>  			read_lock(&tasklist_lock);
>  			do_each_task_pid(tty->session, PIDTYPE_SID, p) {
> +				lock_task_sighand(p, &flags);
>  				p->signal->tty = NULL;
> +				unlock_task_sighand(p, &flags);
>  			} while_each_task_pid(tty->session, PIDTYPE_SID, p);
>  			read_unlock(&tasklist_lock);
> -		} else
> +		} else {
> +			mutex_unlock(&tty_mutex);
>  			return -EPERM;
> +		}
>  	}
> -	mutex_lock(&tty_mutex);
> -	task_lock(current);
> -	current->signal->tty = tty;
> -	task_unlock(current);
> -	mutex_unlock(&tty_mutex);
> -	current->signal->tty_old_pgrp = 0;
>  	tty->session = current->signal->session;
>  	tty->pgrp = process_group(current);
> +	lock_task_sighand(current, &flags);
> +	current->signal->tty = tty;
> +	current->signal->tty_old_pgrp = 0;
> +	unlock_task_sighand(current, &flags);
> +	mutex_unlock(&tty_mutex);
>  	return 0;
>  }

There is a very similar code in tty_open(), probably we need another
helper, proc_set_tty().

But I am not sure about locking. I think we should check
->signal->leader/->signal->tty and set ->tty in proc_set_tty()
under ->siglock, this way we can remove tty_mutex from sys_setsid().

Oleg.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC][PATCH] ->signal->tty locking
  2006-10-17  8:10 ` Oleg Nesterov
@ 2006-10-17 10:17   ` Peter Zijlstra
  2006-10-17 12:33     ` Oleg Nesterov
  0 siblings, 1 reply; 10+ messages in thread
From: Peter Zijlstra @ 2006-10-17 10:17 UTC (permalink / raw)
  To: Oleg Nesterov; +Cc: linux-kernel, Prarit Bhargava, Alan Cox

On Tue, 2006-10-17 at 12:10 +0400, Oleg Nesterov wrote:
> On 10/16, Peter Zijlstra wrote:
> >
> > Oleg wrote:
> > "Historically ->signal/->sighand (both ptrs and their contents) were globally
> > protected by tasklist_lock. 'current' can use these pointers lockless, they
> > can't be changed under him.
> > 
> > Nowadays ->signal/->sighand are _also_ protected by ->sighand->siglock.
> > Unless you are current, you can't lock ->siglock directly (without holding
> > tasklist_lock), you should use lock_task_sighand()."
> > 
> > Then, to be consistent with the rest of the kernel, ->signal->tty
> > locking should look like so:
> > 
> >   mutex_lock(&tty_mutex)
> >     read_lock(&tasklist_lock)
> >       lock_task_sighand(p, &flags)
> 
> I've also started similar patches, but have no time to finish it.
> 
> I don't think we need tasklist_lock. I think ->sighand->siglock is enough.

Right, sys_unshare() makes tasklist_lock meaningless wrt ->siglock.

> So do_task_stat() doesn't need to take tty_mutex at all.
> 
> However, tty_mutex protects ->tty from release_dev(tty), so it is also
> possible to do:
> 
> 	mutex_lock(&tty_mutex);
> 	tty = task->signal->tty;
> 	barrier();
> 	if (tty) {
> 		// ->tty could be changed/cleared from under us,
> 		// but it can't be released while we are holding
> 		// tty_mutex
> 		do_something(tty);
> 	}
> 	...

Nice, I think we have to convert all those callers like sys_vhangup() to
this form.


> > @@ -1350,20 +1357,26 @@ static void do_tty_hangup(void *data)
> >  	  This should get done automatically when the port closes and
> >  	  tty_release is called */
> >  	
> > +	mutex_lock(&tty_mutex);
> 
> I am not sure it is needed.

Right, this would only be needed when using the tty, not when changing
signal->tty.

> >  	read_lock(&tasklist_lock);
> >  	if (tty->session > 0) {
> >  		do_each_task_pid(tty->session, PIDTYPE_SID, p) {
> > +			lock_task_sighand(p, &flags);
> >  			if (p->signal->tty == tty)
> >  				p->signal->tty = NULL;
> > +			unlock_task_sighand(p, &flags);
> 
> We don't need lock_task_sighand() here, we can use spin_lock_irq(->siglock).
> 
> We are holding tasklist_lock. This means that all tasks found by
> do_each_task_pid() have a valid ->signal/->sighand != NULL.
> tasklist_lock protects against release_task()->__exit_signal() and
> from changing ->sighand by de_thread().

I think sys_unshare() spoils the game here; it changes ->sighand in
midair without holding tasklist_lock. So any ->sighand but current's is
fair game.

Hmm, either sys_unshare() is broken in that it doesn't take the
tasklist_lock or a lot of other code is broken.

let us take send_sig_info() vs. sys_unshare()

      1                                        2

read_lock(&tasklist_lock)
spin_lock_irqsave(&p->sighand->siglock, flags);

                         rcu_assign_pointer(current->sighand, new_sigh)

spin_unlock_irqsave(&p->sighand->siglock, flags);
read_unlock(&tasklist_lock);

what happens when 2's current is 1's p....

> > @@ -2910,20 +2953,23 @@ static int tiocsctty(struct tty_struct *
> >  
> >  			read_lock(&tasklist_lock);
> >  			do_each_task_pid(tty->session, PIDTYPE_SID, p) {
> > +				lock_task_sighand(p, &flags);
> >  				p->signal->tty = NULL;
> > +				unlock_task_sighand(p, &flags);
> >  			} while_each_task_pid(tty->session, PIDTYPE_SID, p);
> >  			read_unlock(&tasklist_lock);
> > -		} else
> > +		} else {
> > +			mutex_unlock(&tty_mutex);
> >  			return -EPERM;
> > +		}
> >  	}
> > -	mutex_lock(&tty_mutex);
> > -	task_lock(current);
> > -	current->signal->tty = tty;
> > -	task_unlock(current);
> > -	mutex_unlock(&tty_mutex);
> > -	current->signal->tty_old_pgrp = 0;
> >  	tty->session = current->signal->session;
> >  	tty->pgrp = process_group(current);
> > +	lock_task_sighand(current, &flags);
> > +	current->signal->tty = tty;
> > +	current->signal->tty_old_pgrp = 0;
> > +	unlock_task_sighand(current, &flags);
> > +	mutex_unlock(&tty_mutex);
> >  	return 0;
> >  }
> 
> There is a very similar code in tty_open(), probably we need another
> helper, proc_set_tty().
> 
> But I am not sure about locking. I think we should check
> ->signal->leader/->signal->tty and set ->tty in proc_set_tty()
> under ->siglock, this way we can remove tty_mutex from sys_setsid().

Right, use tty_mutex when using the tty, use ->sighand when changing
signal->tty.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC][PATCH] ->signal->tty locking
  2006-10-17 10:17   ` Peter Zijlstra
@ 2006-10-17 12:33     ` Oleg Nesterov
  2006-10-17 13:00       ` Peter Zijlstra
  2006-10-17 13:29       ` Alan Cox
  0 siblings, 2 replies; 10+ messages in thread
From: Oleg Nesterov @ 2006-10-17 12:33 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-kernel, Prarit Bhargava, Alan Cox

On 10/17, Peter Zijlstra wrote:
>
> On Tue, 2006-10-17 at 12:10 +0400, Oleg Nesterov wrote:
> > 
> > We don't need lock_task_sighand() here, we can use spin_lock_irq(->siglock).
> > 
> > We are holding tasklist_lock. This means that all tasks found by
> > do_each_task_pid() have a valid ->signal/->sighand != NULL.
> > tasklist_lock protects against release_task()->__exit_signal() and
> > from changing ->sighand by de_thread().
> 
> I think sys_unshare() spoils the game here; it changes ->sighand in
> midair without holding tasklist_lock. So any ->sighand but current's is
> fair game.
> 
> Hmm, either sys_unshare() is broken in that it doesn't take the
> tasklist_lock or a lot of other code is broken.

Yes, it is broken, please look at
	http://marc.theaimsgroup.com/?t=114253118100003

I sent a patch,
	http://marc.theaimsgroup.com/?l=linux-kernel&m=114268787415193

but it was ignored. Probably I should re-send it.

> Right, use tty_mutex when using the tty, use ->sighand when changing
> signal->tty.

I think that things like do_task_stat()/do_acct_process() do not need
global tty_mutex, they can use ->siglock.

Oleg.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC][PATCH] ->signal->tty locking
  2006-10-17 12:33     ` Oleg Nesterov
@ 2006-10-17 13:00       ` Peter Zijlstra
  2006-10-17 14:08         ` Alan Cox
  2006-10-18 16:55         ` Oleg Nesterov
  2006-10-17 13:29       ` Alan Cox
  1 sibling, 2 replies; 10+ messages in thread
From: Peter Zijlstra @ 2006-10-17 13:00 UTC (permalink / raw)
  To: Oleg Nesterov; +Cc: linux-kernel, Prarit Bhargava, Alan Cox

On Tue, 2006-10-17 at 16:33 +0400, Oleg Nesterov wrote:
> On 10/17, Peter Zijlstra wrote:

> > Hmm, either sys_unshare() is broken in that it doesn't take the
> > tasklist_lock or a lot of other code is broken.
> 
> Yes, it is broken, please look at

Ah good :-)

How about something like this; I'm still shaky on the lifetime rules of
tty objects, I'm about to add a refcount and spinlock/mutex to
tty_struct, this is madness....

---
 arch/sparc64/solaris/misc.c |    4 -
 arch/um/kernel/exec.c       |    7 +-
 drivers/char/tty_io.c       |  126 ++++++++++++++++++++++++--------------------
 drivers/s390/char/fs3270.c  |   10 ++-
 fs/dquot.c                  |   14 ++--
 fs/open.c                   |    1 
 include/linux/tty.h         |   28 +++++++++
 kernel/acct.c               |    9 +--
 kernel/auditsc.c            |    2 
 kernel/exit.c               |    4 -
 kernel/sys.c                |    6 +-
 security/selinux/hooks.c    |   15 +++--
 12 files changed, 141 insertions(+), 85 deletions(-)

Index: linux-2.6.18.noarch/drivers/char/tty_io.c
===================================================================
--- linux-2.6.18.noarch.orig/drivers/char/tty_io.c
+++ linux-2.6.18.noarch/drivers/char/tty_io.c
@@ -126,7 +126,7 @@ EXPORT_SYMBOL(tty_std_termios);
    
 LIST_HEAD(tty_drivers);			/* linked list of tty drivers */
 
-/* Semaphore to protect creating and releasing a tty. This is shared with
+/* Mutex to protect creating and releasing a tty. This is shared with
    vt.c for deeply disgusting hack reasons */
 DEFINE_MUTEX(tty_mutex);
 EXPORT_SYMBOL(tty_mutex);
@@ -616,7 +616,7 @@ EXPORT_SYMBOL_GPL(tty_prepare_flip_strin
  *	they are not on hot paths so a little discipline won't do 
  *	any harm.
  *
- *	Locking: takes termios_sem
+ *	Locking: takes termios_mutex
  */
  
 static void tty_set_termios_ldisc(struct tty_struct *tty, int num)
@@ -917,7 +917,7 @@ static void tty_ldisc_enable(struct tty_
  *	context.
  *
  *	Locking: takes tty_ldisc_lock.
- *		called functions take termios_sem
+ *		 called functions take termios_mutex
  */
  
 static int tty_set_ldisc(struct tty_struct *tty, int ldisc)
@@ -1269,12 +1269,12 @@ EXPORT_SYMBOL_GPL(tty_ldisc_flush);
  *
  *	Locking:
  *		BKL
- *		redirect lock for undoing redirection
- *		file list lock for manipulating list of ttys
- *		tty_ldisc_lock from called functions
- *		termios_sem resetting termios data
- *		tasklist_lock to walk task list for hangup event
- *
+ *		  redirect lock for undoing redirection
+ *		  file list lock for manipulating list of ttys
+ *		  tty_ldisc_lock from called functions
+ *		  termios_mutex resetting termios data
+ *		  tasklist_lock to walk task list for hangup event
+ *		    ->siglock to protect ->signal/->sighand
  */
 static void do_tty_hangup(void *data)
 {
@@ -1355,14 +1355,18 @@ static void do_tty_hangup(void *data)
 	read_lock(&tasklist_lock);
 	if (tty->session > 0) {
 		do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+			spin_lock_irq(&p->sighand->siglock);
 			if (p->signal->tty == tty)
 				p->signal->tty = NULL;
-			if (!p->signal->leader)
+			if (!p->signal->leader) {
+				spin_unlock_irq(&p->sighand->siglock);
 				continue;
-			group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
-			group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
+			}
+			__group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
+			__group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
 			if (tty->pgrp > 0)
 				p->signal->tty_old_pgrp = tty->pgrp;
+			spin_unlock_irq(&p->sighand->siglock);
 		} while_each_task_pid(tty->session, PIDTYPE_SID, p);
 	}
 	read_unlock(&tasklist_lock);
@@ -1470,10 +1474,12 @@ EXPORT_SYMBOL(tty_hung_up_p);
  *	The argument on_exit is set to 1 if called when a process is
  *	exiting; it is 0 if called by the ioctl TIOCNOTTY.
  *
- *	Locking: tty_mutex is taken to protect current->signal->tty
+ *	Locking:
  *		BKL is taken for hysterical raisins
- *		Tasklist lock is taken (under tty_mutex) to walk process
- *		lists for the session.
+ *		  tty_mutex is taken to protect tty
+ *		  ->siglock is taken to protect ->signal/->sighand
+ *		  tasklist_lock is taken to walk process list for sessions
+ *		    ->siglock is taken to protect ->signal/->sighand
  */
 
 void disassociate_ctty(int on_exit)
@@ -1481,20 +1487,23 @@ void disassociate_ctty(int on_exit)
 	struct tty_struct *tty;
 	struct task_struct *p;
 	int tty_pgrp = -1;
+	int session;
 
 	lock_kernel();
 
 	mutex_lock(&tty_mutex);
-	tty = current->signal->tty;
+	tty = current_get_tty();
 	if (tty) {
 		tty_pgrp = tty->pgrp;
 		mutex_unlock(&tty_mutex);
+		/* XXX: here we race, there is nothing protecting tty */
 		if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY)
 			tty_vhangup(tty);
 	} else {
-		if (current->signal->tty_old_pgrp) {
-			kill_pg(current->signal->tty_old_pgrp, SIGHUP, on_exit);
-			kill_pg(current->signal->tty_old_pgrp, SIGCONT, on_exit);
+		pid_t old_pgrp = current->signal->tty_old_pgrp;
+		if (old_pgrp) {
+			kill_pg(old_pgrp, SIGHUP, on_exit);
+			kill_pg(old_pgrp, SIGCONT, on_exit);
 		}
 		mutex_unlock(&tty_mutex);
 		unlock_kernel();	
@@ -1506,19 +1515,29 @@ void disassociate_ctty(int on_exit)
 			kill_pg(tty_pgrp, SIGCONT, on_exit);
 	}
 
-	/* Must lock changes to tty_old_pgrp */
-	mutex_lock(&tty_mutex);
+	spin_lock_irq(&current->sighand->siglock);
 	current->signal->tty_old_pgrp = 0;
-	tty->session = 0;
-	tty->pgrp = -1;
+	session = current->signal->session;
+	spin_unlock_irq(&current->sighand->siglock);
+
+	mutex_lock(&tty_mutex);
+	/* It is possible that do_tty_hangup has free'd this tty */
+	tty = current_get_tty();
+	if (tty) {
+		tty->session = 0;
+		tty->pgrp = 0;
+	} else {
+#ifdef TTY_DEBUG_HANGUP
+		printk(KERN_DEBUG "error attempted to write to tty [0x%p]"
+		       " = NULL", tty);
+#endif
+	}
+	mutex_unlock(&tty_mutex);
 
 	/* Now clear signal->tty under the lock */
 	read_lock(&tasklist_lock);
-	do_each_task_pid(current->signal->session, PIDTYPE_SID, p) {
-		p->signal->tty = NULL;
-	} while_each_task_pid(current->signal->session, PIDTYPE_SID, p);
+	session_clear_tty(session);
 	read_unlock(&tasklist_lock);
-	mutex_unlock(&tty_mutex);
 	unlock_kernel();
 }
 
@@ -2340,13 +2359,9 @@ static void release_dev(struct file * fi
 		struct task_struct *p;
 
 		read_lock(&tasklist_lock);
-		do_each_task_pid(tty->session, PIDTYPE_SID, p) {
-			p->signal->tty = NULL;
-		} while_each_task_pid(tty->session, PIDTYPE_SID, p);
+		session_clear_tty(tty->session);
 		if (o_tty)
-			do_each_task_pid(o_tty->session, PIDTYPE_SID, p) {
-				p->signal->tty = NULL;
-			} while_each_task_pid(o_tty->session, PIDTYPE_SID, p);
+			session_clear_tty(o_tty->session);
 		read_unlock(&tasklist_lock);
 	}
 
@@ -2467,12 +2482,13 @@ retry_open:
 	mutex_lock(&tty_mutex);
 
 	if (device == MKDEV(TTYAUX_MAJOR,0)) {
-		if (!current->signal->tty) {
+		tty = current_get_tty();
+		if (!tty) {
 			mutex_unlock(&tty_mutex);
 			return -ENXIO;
 		}
-		driver = current->signal->tty->driver;
-		index = current->signal->tty->index;
+		driver = tty->driver;
+		index = tty->index;
 		filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
 		/* noctty = 1; */
 		goto got_driver;
@@ -2547,17 +2563,21 @@ got_driver:
 			filp->f_op = &tty_fops;
 		goto retry_open;
 	}
+
+	mutex_lock(&tty_mutex);
+	spin_lock_irq(&current->sighand->siglock);
 	if (!noctty &&
 	    current->signal->leader &&
 	    !current->signal->tty &&
 	    tty->session == 0) {
-	    	task_lock(current);
 		current->signal->tty = tty;
-		task_unlock(current);
 		current->signal->tty_old_pgrp = 0;
 		tty->session = current->signal->session;
 		tty->pgrp = process_group(current);
 	}
+	/* don't we leak tty in the else case !? */
+	spin_unlock_irq(&current->sighand->siglock);
+	mutex_unlock(&tty_mutex);
 	return 0;
 }
 
@@ -2899,6 +2919,7 @@ static int tiocsctty(struct tty_struct *
 	 */
 	if (!current->signal->leader || current->signal->tty)
 		return -EPERM;
+	mutex_lock(&tty_mutex);
 	if (tty->session > 0) {
 		/*
 		 * This tty is already the controlling
@@ -2908,23 +2929,21 @@ static int tiocsctty(struct tty_struct *
 			/*
 			 * Steal it away
 			 */
-
 			read_lock(&tasklist_lock);
-			do_each_task_pid(tty->session, PIDTYPE_SID, p) {
-				p->signal->tty = NULL;
-			} while_each_task_pid(tty->session, PIDTYPE_SID, p);
+			session_clear_tty(tty->session);
 			read_unlock(&tasklist_lock);
-		} else
+		} else {
+			mutex_unlock(&tty_mutex);
 			return -EPERM;
+		}
 	}
-	mutex_lock(&tty_mutex);
-	task_lock(current);
-	current->signal->tty = tty;
-	task_unlock(current);
-	mutex_unlock(&tty_mutex);
-	current->signal->tty_old_pgrp = 0;
+	spin_lock_irq(&current->sighand->siglock);
 	tty->session = current->signal->session;
 	tty->pgrp = process_group(current);
+	current->signal->tty = tty;
+	current->signal->tty_old_pgrp = 0;
+	spin_unlock_irq(&current->sighand->siglock);
+	mutex_unlock(&tty_mutex);
 	return 0;
 }
 
@@ -2937,7 +2956,7 @@ static int tiocsctty(struct tty_struct *
  *	Obtain the process group of the tty. If there is no process group
  *	return an error.
  *
- *	Locking: none. Reference to ->signal->tty is safe.
+ *	Locking: none. Reference to current->signal->tty is safe.
  */
 
 static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
@@ -2995,7 +3014,7 @@ static int tiocspgrp(struct tty_struct *
  *	Obtain the session id of the tty. If there is no session
  *	return an error.
  *
- *	Locking: none. Reference to ->signal->tty is safe.
+ *	Locking: none. Reference to current->signal->tty is safe.
  */
 
 static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
@@ -3214,14 +3233,11 @@ int tty_ioctl(struct inode * inode, stru
 			clear_bit(TTY_EXCLUSIVE, &tty->flags);
 			return 0;
 		case TIOCNOTTY:
-			/* FIXME: taks lock or tty_mutex ? */
 			if (current->signal->tty != tty)
 				return -ENOTTY;
 			if (current->signal->leader)
 				disassociate_ctty(0);
-			task_lock(current);
-			current->signal->tty = NULL;
-			task_unlock(current);
+			proc_set_tty(current, NULL);
 			return 0;
 		case TIOCSCTTY:
 			return tiocsctty(tty, arg);
Index: linux-2.6.18.noarch/kernel/auditsc.c
===================================================================
--- linux-2.6.18.noarch.orig/kernel/auditsc.c
+++ linux-2.6.18.noarch/kernel/auditsc.c
@@ -819,10 +819,12 @@ static void audit_log_exit(struct audit_
 				 context->return_code);
 
 	mutex_lock(&tty_mutex);
+	read_lock(&tasklist_lock);
 	if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name)
 		tty = tsk->signal->tty->name;
 	else
 		tty = "(none)";
+	read_unlock(&tasklist_lock);
 	audit_log_format(ab,
 		  " a0=%lx a1=%lx a2=%lx a3=%lx items=%d"
 		  " ppid=%d pid=%d auid=%u uid=%u gid=%u"
Index: linux-2.6.18.noarch/kernel/exit.c
===================================================================
--- linux-2.6.18.noarch.orig/kernel/exit.c
+++ linux-2.6.18.noarch/kernel/exit.c
@@ -382,9 +382,7 @@ void daemonize(const char *name, ...)
 	exit_mm(current);
 
 	set_special_pids(1, 1);
-	mutex_lock(&tty_mutex);
-	current->signal->tty = NULL;
-	mutex_unlock(&tty_mutex);
+	proc_set_tty(current, NULL);
 
 	/* Block and flush all signals */
 	sigfillset(&blocked);
Index: linux-2.6.18.noarch/kernel/sys.c
===================================================================
--- linux-2.6.18.noarch.orig/kernel/sys.c
+++ linux-2.6.18.noarch/kernel/sys.c
@@ -1379,7 +1379,6 @@ asmlinkage long sys_setsid(void)
 	pid_t session;
 	int err = -EPERM;
 
-	mutex_lock(&tty_mutex);
 	write_lock_irq(&tasklist_lock);
 
 	/* Fail if I am already a session leader */
@@ -1399,12 +1398,15 @@ asmlinkage long sys_setsid(void)
 
 	group_leader->signal->leader = 1;
 	__set_special_pids(session, session);
+
+	spin_lock(&group_leader->sighand->siglock);
 	group_leader->signal->tty = NULL;
 	group_leader->signal->tty_old_pgrp = 0;
+	spin_unlock(&group_leader->sighand->siglock);
+
 	err = process_group(group_leader);
 out:
 	write_unlock_irq(&tasklist_lock);
-	mutex_unlock(&tty_mutex);
 	return err;
 }
 
Index: linux-2.6.18.noarch/arch/sparc64/solaris/misc.c
===================================================================
--- linux-2.6.18.noarch.orig/arch/sparc64/solaris/misc.c
+++ linux-2.6.18.noarch/arch/sparc64/solaris/misc.c
@@ -423,9 +423,7 @@ asmlinkage int solaris_procids(int cmd, 
 			   Solaris setpgrp and setsid? */
 			ret = sys_setpgid(0, 0);
 			if (ret) return ret;
-			mutex_lock(&tty_mutex);
-			current->signal->tty = NULL;
-			mutex_unlock(&tty_mutex);
+			proc_set_tty(current, NULL);
 			return process_group(current);
 		}
 	case 2: /* getsid */
Index: linux-2.6.18.noarch/arch/um/kernel/exec.c
===================================================================
--- linux-2.6.18.noarch.orig/arch/um/kernel/exec.c
+++ linux-2.6.18.noarch/arch/um/kernel/exec.c
@@ -39,12 +39,13 @@ static long execve1(char *file, char __u
 		    char __user *__user *env)
 {
         long error;
+	struct tty_struct *tty;
 
 #ifdef CONFIG_TTY_LOG
 	mutex_lock(&tty_mutex);
-	task_lock(current);	/* FIXME:  is this needed ? */
-	log_exec(argv, current->signal->tty);
-	task_unlock(current);
+	tty = current_get_tty();
+	if (tty)
+		log_exec(argv, tty);
 	mutex_unlock(&tty_mutex);
 #endif
         error = do_execve(file, argv, env, &current->thread.regs);
Index: linux-2.6.18.noarch/drivers/s390/char/fs3270.c
===================================================================
--- linux-2.6.18.noarch.orig/drivers/s390/char/fs3270.c
+++ linux-2.6.18.noarch/drivers/s390/char/fs3270.c
@@ -425,11 +425,15 @@ fs3270_open(struct inode *inode, struct 
 	minor = iminor(filp->f_dentry->d_inode);
 	/* Check for minor 0 multiplexer. */
 	if (minor == 0) {
-		if (!current->signal->tty)
+		struct tty_struct *tty;
+		mutex_lock(&tty_mutex);
+		tty = current_get_tty();
+		if (!tty)
 			return -ENODEV;
-		if (current->signal->tty->driver->major != IBM_TTY3270_MAJOR)
+		if (tty->driver->major != IBM_TTY3270_MAJOR)
 			return -ENODEV;
-		minor = current->signal->tty->index + RAW3270_FIRSTMINOR;
+		minor = tty->index + RAW3270_FIRSTMINOR;
+		mutex_unlock(&tty_mutex);
 	}
 	/* Check if some other program is already using fullscreen mode. */
 	fp = (struct fs3270 *) raw3270_find_view(&fs3270_fn, minor);
Index: linux-2.6.18.noarch/fs/dquot.c
===================================================================
--- linux-2.6.18.noarch.orig/fs/dquot.c
+++ linux-2.6.18.noarch/fs/dquot.c
@@ -828,6 +828,7 @@ static inline int need_print_warning(str
 static void print_warning(struct dquot *dquot, const char warntype)
 {
 	char *msg = NULL;
+	struct tty_struct *tty;
 	int flag = (warntype == BHARDWARN || warntype == BSOFTLONGWARN) ? DQ_BLKS_B :
 	  ((warntype == IHARDWARN || warntype == ISOFTLONGWARN) ? DQ_INODES_B : 0);
 
@@ -835,14 +836,15 @@ static void print_warning(struct dquot *
 		return;
 
 	mutex_lock(&tty_mutex);
-	if (!current->signal->tty)
+	tty = current_get_tty();
+	if (!tty)
 		goto out_lock;
-	tty_write_message(current->signal->tty, dquot->dq_sb->s_id);
+	tty_write_message(tty, dquot->dq_sb->s_id);
 	if (warntype == ISOFTWARN || warntype == BSOFTWARN)
-		tty_write_message(current->signal->tty, ": warning, ");
+		tty_write_message(tty, ": warning, ");
 	else
-		tty_write_message(current->signal->tty, ": write failed, ");
-	tty_write_message(current->signal->tty, quotatypes[dquot->dq_type]);
+		tty_write_message(tty, ": write failed, ");
+	tty_write_message(tty, quotatypes[dquot->dq_type]);
 	switch (warntype) {
 		case IHARDWARN:
 			msg = " file limit reached.\r\n";
@@ -863,7 +865,7 @@ static void print_warning(struct dquot *
 			msg = " block quota exceeded.\r\n";
 			break;
 	}
-	tty_write_message(current->signal->tty, msg);
+	tty_write_message(tty, msg);
 out_lock:
 	mutex_unlock(&tty_mutex);
 }
Index: linux-2.6.18.noarch/fs/open.c
===================================================================
--- linux-2.6.18.noarch.orig/fs/open.c
+++ linux-2.6.18.noarch/fs/open.c
@@ -1204,6 +1204,7 @@ EXPORT_SYMBOL(sys_close);
 asmlinkage long sys_vhangup(void)
 {
 	if (capable(CAP_SYS_TTY_CONFIG)) {
+		/* XXX: this needs locking */
 		tty_vhangup(current->signal->tty);
 		return 0;
 	}
Index: linux-2.6.18.noarch/include/linux/tty.h
===================================================================
--- linux-2.6.18.noarch.orig/include/linux/tty.h
+++ linux-2.6.18.noarch/include/linux/tty.h
@@ -338,5 +338,33 @@ static inline dev_t tty_devnum(struct tt
 	return MKDEV(tty->driver->major, tty->driver->minor_start) + tty->index;
 }
 
+static inline void proc_set_tty(struct task_struct *p, struct tty_struct *tty)
+{
+	spin_lock_irq(&p->sighand->siglock);
+	p->signal->tty = tty;
+	spin_unlock_irq(&p->sighand->siglock);
+}
+
+static inline void session_clear_tty(pid_t session)
+{
+	struct task_struct *p;
+	do_each_task_pid(session, PIDTYPE_SID, p) {
+		proc_set_tty(p, NULL);
+	} while_each_task_pid(session, PIDTYPE_SID, p);
+}
+
+static inline void current_get_tty(void)
+{
+	struct tty_struct *tty;
+	WARN_ON_ONCE(!mutex_locked(&tty_mutex));
+	tty = current->session->tty;
+	barrier();
+	/*
+	 * ->tty could be changed/cleared from under us, but it can't be
+	 * released while we are holding tty_mutex
+	 */
+	return tty;
+}
+
 #endif /* __KERNEL__ */
 #endif
Index: linux-2.6.18.noarch/kernel/acct.c
===================================================================
--- linux-2.6.18.noarch.orig/kernel/acct.c
+++ linux-2.6.18.noarch/kernel/acct.c
@@ -427,6 +427,7 @@ static void do_acct_process(struct file 
 	u64 elapsed;
 	u64 run_time;
 	struct timespec uptime;
+	struct tty_struct *tty;
 
 	/*
 	 * First check to see if there is enough free_space to continue
@@ -484,12 +485,8 @@ static void do_acct_process(struct file 
 #endif
 
 	mutex_lock(&tty_mutex);
-	/* FIXME: Whoever is responsible for current->signal locking needs
-	   to use the same locking all over the kernel and document it */
-	read_lock(&tasklist_lock);
-	ac.ac_tty = current->signal->tty ?
-		old_encode_dev(tty_devnum(current->signal->tty)) : 0;
-	read_unlock(&tasklist_lock);
+	tty = current_get_tty();
+	ac.ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0;
 	mutex_unlock(&tty_mutex);
 
 	spin_lock_irq(&current->sighand->siglock);
Index: linux-2.6.18.noarch/security/selinux/hooks.c
===================================================================
--- linux-2.6.18.noarch.orig/security/selinux/hooks.c
+++ linux-2.6.18.noarch/security/selinux/hooks.c
@@ -1708,9 +1708,10 @@ static inline void flush_unauthorized_fi
 	struct tty_struct *tty;
 	struct fdtable *fdt;
 	long j = -1;
+	int drop_tty = 0;
 
 	mutex_lock(&tty_mutex);
-	tty = current->signal->tty;
+	tty = current_get_tty();
 	if (tty) {
 		file_list_lock();
 		file = list_entry(tty->tty_files.next, typeof(*file), f_u.fu_list);
@@ -1723,12 +1724,18 @@ static inline void flush_unauthorized_fi
 			struct inode *inode = file->f_dentry->d_inode;
 			if (inode_has_perm(current, inode,
 					   FILE__READ | FILE__WRITE, NULL)) {
-				/* Reset controlling tty. */
-				current->signal->tty = NULL;
-				current->signal->tty_old_pgrp = 0;
+				drop_tty = 1;
 			}
 		}
 		file_list_unlock();
+
+		if (drop_tty) {
+			/* Reset controlling tty. */
+			spin_lock_irq(&current->sighand->siglock);
+			current->signal->tty = NULL;
+			current->signal->tty_old_pgrp = 0;
+			spin_unlock_irq(&current->sighand->siglock);
+		}
 	}
 	mutex_unlock(&tty_mutex);
 



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC][PATCH] ->signal->tty locking
  2006-10-17 12:33     ` Oleg Nesterov
  2006-10-17 13:00       ` Peter Zijlstra
@ 2006-10-17 13:29       ` Alan Cox
  2006-10-18 17:21         ` Oleg Nesterov
  1 sibling, 1 reply; 10+ messages in thread
From: Alan Cox @ 2006-10-17 13:29 UTC (permalink / raw)
  To: Oleg Nesterov; +Cc: Peter Zijlstra, linux-kernel, Prarit Bhargava

Ar Maw, 2006-10-17 am 16:33 +0400, ysgrifennodd Oleg Nesterov:
> I sent a patch,
> 	http://marc.theaimsgroup.com/?l=linux-kernel&m=114268787415193
> 
> but it was ignored. Probably I should re-send it.

Definitely - we still see reports of tty slab scribbles

> > Right, use tty_mutex when using the tty, use ->sighand when changing
> > signal->tty.
> 
> I think that things like do_task_stat()/do_acct_process() do not need
> global tty_mutex, they can use ->siglock.

Please keep the tty_mutex as it will protect against other stuff later.
Once tty is a bit saner then someone brave can refcount it properly and
that'll make it much prettier.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC][PATCH] ->signal->tty locking
  2006-10-17 13:00       ` Peter Zijlstra
@ 2006-10-17 14:08         ` Alan Cox
  2006-10-18 16:55         ` Oleg Nesterov
  1 sibling, 0 replies; 10+ messages in thread
From: Alan Cox @ 2006-10-17 14:08 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Oleg Nesterov, linux-kernel, Prarit Bhargava

Ar Maw, 2006-10-17 am 15:00 +0200, ysgrifennodd Peter Zijlstra:
> How about something like this; I'm still shaky on the lifetime rules of
> tty objects, I'm about to add a refcount and spinlock/mutex to
> tty_struct, this is madness....

It has two already.

I wouldn't worry about being shaky about the lifetime rules, thats a
forensics job at the moment.

> +	tty = current_get_tty();

Sensible way to go whichever path we use and once it returns a refcount
bumped object in future it'll clean up a lot more. get_current_tty or
just current_tty() would fit the naming in the kernel better


I agree entirely with the path this patch is taking.

Alan


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC][PATCH] ->signal->tty locking
  2006-10-17 13:00       ` Peter Zijlstra
  2006-10-17 14:08         ` Alan Cox
@ 2006-10-18 16:55         ` Oleg Nesterov
  1 sibling, 0 replies; 10+ messages in thread
From: Oleg Nesterov @ 2006-10-18 16:55 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: linux-kernel, Prarit Bhargava, Alan Cox

On 10/17, Peter Zijlstra wrote:
>
> How about something like this; I'm still shaky on the lifetime rules of
> tty objects, I'm about to add a refcount and spinlock/mutex to
> tty_struct, this is madness....

Sorry for delay, a couple of minor nits...

>  static void do_tty_hangup(void *data)
>  {
> @@ -1355,14 +1355,18 @@ static void do_tty_hangup(void *data)
>  	read_lock(&tasklist_lock);
>  	if (tty->session > 0) {
>  		do_each_task_pid(tty->session, PIDTYPE_SID, p) {
> +			spin_lock_irq(&p->sighand->siglock);
>  			if (p->signal->tty == tty)
>  				p->signal->tty = NULL;
> -			if (!p->signal->leader)
> +			if (!p->signal->leader) {
> +				spin_unlock_irq(&p->sighand->siglock);
>  				continue;
> -			group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
> -			group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
> +			}
> +			__group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
> +			__group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);

So we are skipping security_task_kill() and audit_signal_info().
I don't claim this is bad, I just don't know.

> @@ -2899,6 +2919,7 @@ static int tiocsctty(struct tty_struct *
>  	 */
>  	if (!current->signal->leader || current->signal->tty)
>  		return -EPERM;
> +	mutex_lock(&tty_mutex);

This is still racy (consider 2 threads doing tiocsctty() at the same time),
probably it is better to take tty_mutex before the check?

> --- linux-2.6.18.noarch.orig/include/linux/tty.h
> +++ linux-2.6.18.noarch/include/linux/tty.h
> @@ -338,5 +338,33 @@ static inline dev_t tty_devnum(struct tt
>  	return MKDEV(tty->driver->major, tty->driver->minor_start) + tty->index;
>  }
>  
> +static inline void proc_set_tty(struct task_struct *p, struct tty_struct *tty)
> +{
> +	spin_lock_irq(&p->sighand->siglock);
> +	p->signal->tty = tty;
> +	spin_unlock_irq(&p->sighand->siglock);
> +}

Note that it is always called with tty == NULL parameter. That is why
I proposed proc_clear_tty(struct task_struct *p). We can't use this
helper for tiocsctty/tty_open anyway.

> +static inline void session_clear_tty(pid_t session)
> +{
> +	struct task_struct *p;
> +	do_each_task_pid(session, PIDTYPE_SID, p) {
> +		proc_set_tty(p, NULL);
> +	} while_each_task_pid(session, PIDTYPE_SID, p);
> +}
> +

I'd suggest to move it to tty_io.c and make it static (not inline).

> ===================================================================
> --- linux-2.6.18.noarch.orig/security/selinux/hooks.c
> +++ linux-2.6.18.noarch/security/selinux/hooks.c
> @@ -1708,9 +1708,10 @@ static inline void flush_unauthorized_fi
>  	struct tty_struct *tty;
>  	struct fdtable *fdt;
>  	long j = -1;
> +	int drop_tty = 0;
>  
>  	mutex_lock(&tty_mutex);
> -	tty = current->signal->tty;
> +	tty = current_get_tty();
>  	if (tty) {
>  		file_list_lock();
>  		file = list_entry(tty->tty_files.next, typeof(*file), f_u.fu_list);
> @@ -1723,12 +1724,18 @@ static inline void flush_unauthorized_fi
>  			struct inode *inode = file->f_dentry->d_inode;
>  			if (inode_has_perm(current, inode,
>  					   FILE__READ | FILE__WRITE, NULL)) {
> -				/* Reset controlling tty. */
> -				current->signal->tty = NULL;
> -				current->signal->tty_old_pgrp = 0;
> +				drop_tty = 1;
>  			}
>  		}
>  		file_list_unlock();
> +
> +		if (drop_tty) {
> +			/* Reset controlling tty. */
> +			spin_lock_irq(&current->sighand->siglock);
> +			current->signal->tty = NULL;
> +			current->signal->tty_old_pgrp = 0;

Probably the last line should go to proc_clear_tty() ?

On the other hand, when signal->tty != NULL, ->tty_old_pgrp
should be == 0, may be it is unneeded.

In any case, I think we should use proc_set_tty() here.

Oleg.


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [RFC][PATCH] ->signal->tty locking
  2006-10-17 13:29       ` Alan Cox
@ 2006-10-18 17:21         ` Oleg Nesterov
  0 siblings, 0 replies; 10+ messages in thread
From: Oleg Nesterov @ 2006-10-18 17:21 UTC (permalink / raw)
  To: Alan Cox; +Cc: Peter Zijlstra, linux-kernel, Prarit Bhargava

On 10/17, Alan Cox wrote:
>
> Ar Maw, 2006-10-17 am 16:33 +0400, ysgrifennodd Oleg Nesterov:
> > I sent a patch,
> > 	http://marc.theaimsgroup.com/?l=linux-kernel&m=114268787415193
> > 
> > but it was ignored. Probably I should re-send it.
> 
> Definitely - we still see reports of tty slab scribbles

This patch can't fix anything, sorry for the confusion.

Yes, the 'if (new_sigh) {}' code in sys_unshare() is broken, but it is
never executed, because unshare_sighand() never populates new_sighp.

Probably it is better to just remove this code.

> > > Right, use tty_mutex when using the tty, use ->sighand when changing
> > > signal->tty.
> > 
> > I think that things like do_task_stat()/do_acct_process() do not need
> > global tty_mutex, they can use ->siglock.
> 
> Please keep the tty_mutex as it will protect against other stuff later.
> Once tty is a bit saner then someone brave can refcount it properly and
> that'll make it much prettier.

Oh, but it is silly to take the global tty_mutex in do_task_stat().
Why do we need it if ->siglock protects ->signal->tty ? We are only
reading the tty_struct, tty->driver can't go away ...

Oleg.


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2006-10-18 17:21 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-10-16  9:53 [RFC][PATCH] ->signal->tty locking Peter Zijlstra
2006-10-16 13:39 ` Prarit Bhargava
2006-10-17  8:10 ` Oleg Nesterov
2006-10-17 10:17   ` Peter Zijlstra
2006-10-17 12:33     ` Oleg Nesterov
2006-10-17 13:00       ` Peter Zijlstra
2006-10-17 14:08         ` Alan Cox
2006-10-18 16:55         ` Oleg Nesterov
2006-10-17 13:29       ` Alan Cox
2006-10-18 17:21         ` Oleg Nesterov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox