* [RFC][PATCH] ->signal->tty locking
@ 2006-10-16 9:53 Peter Zijlstra
2006-10-16 13:39 ` Prarit Bhargava
2006-10-17 8:10 ` Oleg Nesterov
0 siblings, 2 replies; 10+ messages in thread
From: Peter Zijlstra @ 2006-10-16 9:53 UTC (permalink / raw)
To: linux-kernel; +Cc: Prarit Bhargava, Oleg Nesterov, Alan Cox
Oleg wrote:
"Historically ->signal/->sighand (both ptrs and their contents) were globally
protected by tasklist_lock. 'current' can use these pointers lockless, they
can't be changed under him.
Nowadays ->signal/->sighand are _also_ protected by ->sighand->siglock.
Unless you are current, you can't lock ->siglock directly (without holding
tasklist_lock), you should use lock_task_sighand()."
Then, to be consistent with the rest of the kernel, ->signal->tty
locking should look like so:
mutex_lock(&tty_mutex)
read_lock(&tasklist_lock)
lock_task_sighand(p, &flags)
However, lock_task_sighand(), is a conditional lock, p might not have a
->sighand, in which case it returns NULL. What would that mean for
->signal, can I then still modify it?
struct sighand_struct *sighand = lock_task_sighand(p, &flags);
if (sighand) {
/* modify/use ->signal->tty */
unlock_task_sighand(p, &flags);
} else
/* now what !? */
The same problem appears in fs/proc/array.c:do_task_stat(), there the
locking doesn't look quite right either.
Before realizing this I came this far:
---
drivers/char/tty_io.c | 86 ++++++++++++++++++++++++++++++++++++++------------
kernel/auditsc.c | 1
kernel/exit.c | 3 +
kernel/sys.c | 4 ++
4 files changed, 75 insertions(+), 19 deletions(-)
Index: linux-2.6/drivers/char/tty_io.c
===================================================================
--- linux-2.6.orig/drivers/char/tty_io.c
+++ linux-2.6/drivers/char/tty_io.c
@@ -63,6 +63,12 @@
*
* Move do_SAK() into process context. Less stack use in devfs functions.
* alloc_tty_struct() always uses kmalloc() -- Andrew Morton <andrewm@uow.edu.eu> 17Mar01
+ *
+ * A word on (struct task)::signal->tty locking
+ *
+ * mutex_lock(&tty_mutex)
+ * read_lock(&tasklist_lock)
+ * lock_task_sighand()
*/
#include <linux/types.h>
@@ -1282,6 +1288,7 @@ static void do_tty_hangup(void *data)
struct task_struct *p;
struct tty_ldisc *ld;
int closecount = 0, n;
+ unsigned long flags;
if (!tty)
return;
@@ -1350,20 +1357,26 @@ static void do_tty_hangup(void *data)
This should get done automatically when the port closes and
tty_release is called */
+ mutex_lock(&tty_mutex);
read_lock(&tasklist_lock);
if (tty->session > 0) {
do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+ lock_task_sighand(p, &flags);
if (p->signal->tty == tty)
p->signal->tty = NULL;
+ unlock_task_sighand(p, &flags);
if (!p->signal->leader)
continue;
group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
+ lock_task_sighand(p, &flags);
if (tty->pgrp > 0)
p->signal->tty_old_pgrp = tty->pgrp;
+ unlock_task_sighand(p, &flags);
} while_each_task_pid(tty->session, PIDTYPE_SID, p);
}
read_unlock(&tasklist_lock);
+ mutex_unlock(&tty_mutex);
tty->flags = 0;
tty->session = 0;
@@ -1479,10 +1492,12 @@ void disassociate_ctty(int on_exit)
struct tty_struct *tty;
struct task_struct *p;
int tty_pgrp = -1;
+ unsigned long flags;
lock_kernel();
mutex_lock(&tty_mutex);
+ /* XXX is this save wrt siglock!? */
tty = current->signal->tty;
if (tty) {
tty_pgrp = tty->pgrp;
@@ -1490,9 +1505,10 @@ void disassociate_ctty(int on_exit)
if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY)
tty_vhangup(tty);
} else {
- if (current->signal->tty_old_pgrp) {
- kill_pg(current->signal->tty_old_pgrp, SIGHUP, on_exit);
- kill_pg(current->signal->tty_old_pgrp, SIGCONT, on_exit);
+ pid_t old_pgrp = current->signal->tty_old_pgrp;
+ if (old_pgrp) {
+ kill_pg(old_pgrp, SIGHUP, on_exit);
+ kill_pg(old_pgrp, SIGCONT, on_exit);
}
mutex_unlock(&tty_mutex);
unlock_kernel();
@@ -1506,14 +1522,27 @@ void disassociate_ctty(int on_exit)
/* Must lock changes to tty_old_pgrp */
mutex_lock(&tty_mutex);
+ lock_task_sighand(current, &flags);
current->signal->tty_old_pgrp = 0;
- tty->session = 0;
- tty->pgrp = -1;
+
+ /* It is possible that do_tty_hangup has free'd this tty */
+ if (current->signal->tty) {
+ current->signal->tty->session = 0;
+ current->signal->tty->pgrp = 0;
+ } else {
+#ifdef TTY_DEBUG_HANGUP
+ printk(KERN_DEBUG "error attempted to write to tty [0x%p]"
+ " = NULL", tty);
+#endif
+ }
+ unlock_task_sighand(current, &flags);
/* Now clear signal->tty under the lock */
read_lock(&tasklist_lock);
do_each_task_pid(current->signal->session, PIDTYPE_SID, p) {
+ lock_task_sighand(p, &flags);
p->signal->tty = NULL;
+ unlock_task_sighand(p, &flags);
} while_each_task_pid(current->signal->session, PIDTYPE_SID, p);
read_unlock(&tasklist_lock);
mutex_unlock(&tty_mutex);
@@ -2340,11 +2369,15 @@ static void release_dev(struct file * fi
read_lock(&tasklist_lock);
do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+ lock_task_sighand(p, &flags);
p->signal->tty = NULL;
+ unlock_task_sighand(p, &flags);
} while_each_task_pid(tty->session, PIDTYPE_SID, p);
if (o_tty)
do_each_task_pid(o_tty->session, PIDTYPE_SID, p) {
+ lock_task_sighand(p, &flags);
p->signal->tty = NULL;
+ unlock_task_sighand(p, &flags);
} while_each_task_pid(o_tty->session, PIDTYPE_SID, p);
read_unlock(&tasklist_lock);
}
@@ -2455,6 +2488,7 @@ static int tty_open(struct inode * inode
int index;
dev_t device = inode->i_rdev;
unsigned short saved_flags = filp->f_flags;
+ unsigned long flags;
nonseekable_open(inode, filp);
@@ -2466,7 +2500,9 @@ retry_open:
mutex_lock(&tty_mutex);
if (device == MKDEV(TTYAUX_MAJOR,0)) {
+ lock_task_sighand(current, &flags);
if (!current->signal->tty) {
+ unlock_task_sighand(current, &flags);
mutex_unlock(&tty_mutex);
return -ENXIO;
}
@@ -2474,6 +2510,7 @@ retry_open:
index = current->signal->tty->index;
filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
/* noctty = 1; */
+ unlock_task_sighand(current, &flags);
goto got_driver;
}
#ifdef CONFIG_VT
@@ -2546,17 +2583,21 @@ got_driver:
filp->f_op = &tty_fops;
goto retry_open;
}
+
+ mutex_lock(&tty_mutex);
+ lock_task_sighand(current, &flags);
if (!noctty &&
current->signal->leader &&
!current->signal->tty &&
tty->session == 0) {
- task_lock(current);
current->signal->tty = tty;
- task_unlock(current);
current->signal->tty_old_pgrp = 0;
tty->session = current->signal->session;
tty->pgrp = process_group(current);
}
+ /* don't we leak tty in the else case !? */
+ unlock_task_sighand(current, &flags);
+ mutex_unlock(&tty_mutex);
return 0;
}
@@ -2888,6 +2929,7 @@ static int fionbio(struct file *file, in
static int tiocsctty(struct tty_struct *tty, int arg)
{
struct task_struct *p;
+ unsigned long flags;
if (current->signal->leader &&
(current->signal->session == tty->session))
@@ -2898,6 +2940,7 @@ static int tiocsctty(struct tty_struct *
*/
if (!current->signal->leader || current->signal->tty)
return -EPERM;
+ mutex_lock(&tty_mutex);
if (tty->session > 0) {
/*
* This tty is already the controlling
@@ -2910,20 +2953,23 @@ static int tiocsctty(struct tty_struct *
read_lock(&tasklist_lock);
do_each_task_pid(tty->session, PIDTYPE_SID, p) {
+ lock_task_sighand(p, &flags);
p->signal->tty = NULL;
+ unlock_task_sighand(p, &flags);
} while_each_task_pid(tty->session, PIDTYPE_SID, p);
read_unlock(&tasklist_lock);
- } else
+ } else {
+ mutex_unlock(&tty_mutex);
return -EPERM;
+ }
}
- mutex_lock(&tty_mutex);
- task_lock(current);
- current->signal->tty = tty;
- task_unlock(current);
- mutex_unlock(&tty_mutex);
- current->signal->tty_old_pgrp = 0;
tty->session = current->signal->session;
tty->pgrp = process_group(current);
+ lock_task_sighand(current, &flags);
+ current->signal->tty = tty;
+ current->signal->tty_old_pgrp = 0;
+ unlock_task_sighand(current, &flags);
+ mutex_unlock(&tty_mutex);
return 0;
}
@@ -2936,7 +2982,7 @@ static int tiocsctty(struct tty_struct *
* Obtain the process group of the tty. If there is no process group
* return an error.
*
- * Locking: none. Reference to ->signal->tty is safe.
+ * Locking: none. Reference to current->signal->tty is safe.
*/
static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
@@ -2994,7 +3040,7 @@ static int tiocspgrp(struct tty_struct *
* Obtain the session id of the tty. If there is no session
* return an error.
*
- * Locking: none. Reference to ->signal->tty is safe.
+ * Locking: none. Reference to current->signal->tty is safe.
*/
static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p)
@@ -3139,6 +3185,7 @@ int tty_ioctl(struct inode * inode, stru
void __user *p = (void __user *)arg;
int retval;
struct tty_ldisc *ld;
+ unsigned long flags;
tty = (struct tty_struct *)file->private_data;
if (tty_paranoia_check(tty, inode, "tty_ioctl"))
@@ -3213,14 +3260,15 @@ int tty_ioctl(struct inode * inode, stru
clear_bit(TTY_EXCLUSIVE, &tty->flags);
return 0;
case TIOCNOTTY:
- /* FIXME: taks lock or tty_mutex ? */
if (current->signal->tty != tty)
return -ENOTTY;
if (current->signal->leader)
disassociate_ctty(0);
- task_lock(current);
+ mutex_lock(&tty_mutex);
+ lock_task_sighand(current, &flags)
current->signal->tty = NULL;
- task_unlock(current);
+ unlock_task_sighand(current, &flags);
+ mutex_unlock(&tty_mutex);
return 0;
case TIOCSCTTY:
return tiocsctty(tty, arg);
Index: linux-2.6/kernel/auditsc.c
===================================================================
--- linux-2.6.orig/kernel/auditsc.c
+++ linux-2.6/kernel/auditsc.c
@@ -823,6 +823,7 @@ static void audit_log_exit(struct audit_
context->return_code);
mutex_lock(&tty_mutex);
+ /* XXX doesn't this race like mad? */
if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name)
tty = tsk->signal->tty->name;
else
Index: linux-2.6/kernel/exit.c
===================================================================
--- linux-2.6.orig/kernel/exit.c
+++ linux-2.6/kernel/exit.c
@@ -370,6 +370,7 @@ void daemonize(const char *name, ...)
va_list args;
struct fs_struct *fs;
sigset_t blocked;
+ unsigned long flags;
va_start(args, name);
vsnprintf(current->comm, sizeof(current->comm), name, args);
@@ -384,7 +385,9 @@ void daemonize(const char *name, ...)
set_special_pids(1, 1);
mutex_lock(&tty_mutex);
+ lock_task_sighand(current, &flags);
current->signal->tty = NULL;
+ unlock_task_sighand(current, &flags);
mutex_unlock(&tty_mutex);
/* Block and flush all signals */
Index: linux-2.6/kernel/sys.c
===================================================================
--- linux-2.6.orig/kernel/sys.c
+++ linux-2.6/kernel/sys.c
@@ -1483,6 +1483,7 @@ asmlinkage long sys_setsid(void)
struct task_struct *group_leader = current->group_leader;
pid_t session;
int err = -EPERM;
+ unsigned long flags;
mutex_lock(&tty_mutex);
write_lock_irq(&tasklist_lock);
@@ -1504,8 +1505,11 @@ asmlinkage long sys_setsid(void)
group_leader->signal->leader = 1;
__set_special_pids(session, session);
+ /* XXX move this lock up 2 lines? */
+ lock_task_sighand(group_leader, &flags);
group_leader->signal->tty = NULL;
group_leader->signal->tty_old_pgrp = 0;
+ unlock_task_sighand(group_leader, &flags);
err = process_group(group_leader);
out:
write_unlock_irq(&tasklist_lock);
^ permalink raw reply [flat|nested] 10+ messages in thread* Re: [RFC][PATCH] ->signal->tty locking 2006-10-16 9:53 [RFC][PATCH] ->signal->tty locking Peter Zijlstra @ 2006-10-16 13:39 ` Prarit Bhargava 2006-10-17 8:10 ` Oleg Nesterov 1 sibling, 0 replies; 10+ messages in thread From: Prarit Bhargava @ 2006-10-16 13:39 UTC (permalink / raw) To: Peter Zijlstra; +Cc: linux-kernel Peter Zijlstra wrote: > Oleg wrote: > "Historically ->signal/->sighand (both ptrs and their contents) were globally > protected by tasklist_lock. 'current' can use these pointers lockless, they > can't be changed under him. > > Nowadays ->signal/->sighand are _also_ protected by ->sighand->siglock. > Unless you are current, you can't lock ->siglock directly (without holding > tasklist_lock), you should use lock_task_sighand()." > > Then, to be consistent with the rest of the kernel, ->signal->tty > locking should look like so: > > mutex_lock(&tty_mutex) > read_lock(&tasklist_lock) > lock_task_sighand(p, &flags) > It would be nice if we could clean up some of the complicated locking in this code. For example, from do_tty_hangup, * * Locking: * BKL * redirect lock for undoing redirection * file list lock for manipulating list of ttys * tty_ldisc_lock from called functions * termios_sem resetting termios data * tasklist_lock to walk task list for hangup event P. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC][PATCH] ->signal->tty locking 2006-10-16 9:53 [RFC][PATCH] ->signal->tty locking Peter Zijlstra 2006-10-16 13:39 ` Prarit Bhargava @ 2006-10-17 8:10 ` Oleg Nesterov 2006-10-17 10:17 ` Peter Zijlstra 1 sibling, 1 reply; 10+ messages in thread From: Oleg Nesterov @ 2006-10-17 8:10 UTC (permalink / raw) To: Peter Zijlstra; +Cc: linux-kernel, Prarit Bhargava, Alan Cox On 10/16, Peter Zijlstra wrote: > > Oleg wrote: > "Historically ->signal/->sighand (both ptrs and their contents) were globally > protected by tasklist_lock. 'current' can use these pointers lockless, they > can't be changed under him. > > Nowadays ->signal/->sighand are _also_ protected by ->sighand->siglock. > Unless you are current, you can't lock ->siglock directly (without holding > tasklist_lock), you should use lock_task_sighand()." > > Then, to be consistent with the rest of the kernel, ->signal->tty > locking should look like so: > > mutex_lock(&tty_mutex) > read_lock(&tasklist_lock) > lock_task_sighand(p, &flags) I've also started similar patches, but have no time to finish it. I don't think we need tasklist_lock. I think ->sighand->siglock is enough. So do_task_stat() doesn't need to take tty_mutex at all. However, tty_mutex protects ->tty from release_dev(tty), so it is also possible to do: mutex_lock(&tty_mutex); tty = task->signal->tty; barrier(); if (tty) { // ->tty could be changed/cleared from under us, // but it can't be released while we are holding // tty_mutex do_something(tty); } ... But first I think we should kill task_lock(). This is changelog for the first patch in unfinished series: Taking task_lock() when updating/using signal->tty doesn't help because - it is used only in some random places - signal->tty is per-process, while ->alloc_lock is per-thread We can improve this if we take task_lock(task->group_leader), but I think this is not the best option and we should use sighand->siglock instead, because - task_lock() interacts badly with write_lock_irq(&tasklist_lock), sys_setsid() won't be happy. - unless we are 'current' or tasklist_lock is held, we anyway need ->siglock to access ->signal. Actually, even reading ->group_leader is not safe unless we know the task was not released. - most of signal_struct's contents is already protected by ->siglock, why ->tty isn't? > However, lock_task_sighand(), is a conditional lock, p might not have a > ->sighand, in which case it returns NULL. What would that mean for > ->signal, can I then still modify it? This means the task has already dead, it doesn't have ->signal. > struct sighand_struct *sighand = lock_task_sighand(p, &flags); > if (sighand) { > /* modify/use ->signal->tty */ > unlock_task_sighand(p, &flags); > } else > /* now what !? */ see above. > The same problem appears in fs/proc/array.c:do_task_stat(), there the > locking doesn't look quite right either. Hmm. I think do_task_stat() is fine, it does nothing when lock_task_sighand() fails. > --- linux-2.6.orig/drivers/char/tty_io.c > +++ linux-2.6/drivers/char/tty_io.c > @@ -63,6 +63,12 @@ > * > * Move do_SAK() into process context. Less stack use in devfs functions. > * alloc_tty_struct() always uses kmalloc() -- Andrew Morton <andrewm@uow.edu.eu> 17Mar01 > + * > + * A word on (struct task)::signal->tty locking > + * > + * mutex_lock(&tty_mutex) > + * read_lock(&tasklist_lock) > + * lock_task_sighand() > */ > > #include <linux/types.h> > @@ -1282,6 +1288,7 @@ static void do_tty_hangup(void *data) > struct task_struct *p; > struct tty_ldisc *ld; > int closecount = 0, n; > + unsigned long flags; > > if (!tty) > return; > @@ -1350,20 +1357,26 @@ static void do_tty_hangup(void *data) > This should get done automatically when the port closes and > tty_release is called */ > > + mutex_lock(&tty_mutex); I am not sure it is needed. > read_lock(&tasklist_lock); > if (tty->session > 0) { > do_each_task_pid(tty->session, PIDTYPE_SID, p) { > + lock_task_sighand(p, &flags); > if (p->signal->tty == tty) > p->signal->tty = NULL; > + unlock_task_sighand(p, &flags); We don't need lock_task_sighand() here, we can use spin_lock_irq(->siglock). We are holding tasklist_lock. This means that all tasks found by do_each_task_pid() have a valid ->signal/->sighand != NULL. tasklist_lock protects against release_task()->__exit_signal() and from changing ->sighand by de_thread(). > @@ -1506,14 +1522,27 @@ void disassociate_ctty(int on_exit) > > /* Must lock changes to tty_old_pgrp */ > mutex_lock(&tty_mutex); > + lock_task_sighand(current, &flags); Again, we can use spin_lock_irq(current->signal->siglock). It is safe to use current->sighand directly, it can't be freed or changed from under us. > current->signal->tty_old_pgrp = 0; > - tty->session = 0; > - tty->pgrp = -1; > + > + /* It is possible that do_tty_hangup has free'd this tty */ > + if (current->signal->tty) { > + current->signal->tty->session = 0; > + current->signal->tty->pgrp = 0; > + } else { > +#ifdef TTY_DEBUG_HANGUP > + printk(KERN_DEBUG "error attempted to write to tty [0x%p]" > + " = NULL", tty); > +#endif > + } > + unlock_task_sighand(current, &flags); > > /* Now clear signal->tty under the lock */ > read_lock(&tasklist_lock); > do_each_task_pid(current->signal->session, PIDTYPE_SID, p) { > + lock_task_sighand(p, &flags); > p->signal->tty = NULL; > + unlock_task_sighand(p, &flags); > } while_each_task_pid(current->signal->session, PIDTYPE_SID, p); > read_unlock(&tasklist_lock); > mutex_unlock(&tty_mutex); > @@ -2340,11 +2369,15 @@ static void release_dev(struct file * fi > > read_lock(&tasklist_lock); > do_each_task_pid(tty->session, PIDTYPE_SID, p) { > + lock_task_sighand(p, &flags); > p->signal->tty = NULL; > + unlock_task_sighand(p, &flags); > } while_each_task_pid(tty->session, PIDTYPE_SID, p); > if (o_tty) > do_each_task_pid(o_tty->session, PIDTYPE_SID, p) { > + lock_task_sighand(p, &flags); > p->signal->tty = NULL; > + unlock_task_sighand(p, &flags); > } while_each_task_pid(o_tty->session, PIDTYPE_SID, p); > read_unlock(&tasklist_lock); While doing a similar changes, I introduced a couple of simple helpers: static inline void proc_clear_tty(struct task_struct *p) { spin_lock_irq(&p->sighand->siglock); p->signal->tty = NULL; spin_unlock_irq(&p->sighand->siglock); } static void session_clear_tty(pid_t session) { struct task_struct *p; do_each_task_pid(session, PIDTYPE_SID, p) { proc_clear_tty(p); } while_each_task_pid(session, PIDTYPE_SID, p); } I think it makes sense. > static int tiocsctty(struct tty_struct *tty, int arg) > { > struct task_struct *p; > + unsigned long flags; > > if (current->signal->leader && > (current->signal->session == tty->session)) > @@ -2898,6 +2940,7 @@ static int tiocsctty(struct tty_struct * > */ > if (!current->signal->leader || current->signal->tty) > return -EPERM; > + mutex_lock(&tty_mutex); > if (tty->session > 0) { > /* > * This tty is already the controlling > @@ -2910,20 +2953,23 @@ static int tiocsctty(struct tty_struct * > > read_lock(&tasklist_lock); > do_each_task_pid(tty->session, PIDTYPE_SID, p) { > + lock_task_sighand(p, &flags); > p->signal->tty = NULL; > + unlock_task_sighand(p, &flags); > } while_each_task_pid(tty->session, PIDTYPE_SID, p); > read_unlock(&tasklist_lock); > - } else > + } else { > + mutex_unlock(&tty_mutex); > return -EPERM; > + } > } > - mutex_lock(&tty_mutex); > - task_lock(current); > - current->signal->tty = tty; > - task_unlock(current); > - mutex_unlock(&tty_mutex); > - current->signal->tty_old_pgrp = 0; > tty->session = current->signal->session; > tty->pgrp = process_group(current); > + lock_task_sighand(current, &flags); > + current->signal->tty = tty; > + current->signal->tty_old_pgrp = 0; > + unlock_task_sighand(current, &flags); > + mutex_unlock(&tty_mutex); > return 0; > } There is a very similar code in tty_open(), probably we need another helper, proc_set_tty(). But I am not sure about locking. I think we should check ->signal->leader/->signal->tty and set ->tty in proc_set_tty() under ->siglock, this way we can remove tty_mutex from sys_setsid(). Oleg. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC][PATCH] ->signal->tty locking 2006-10-17 8:10 ` Oleg Nesterov @ 2006-10-17 10:17 ` Peter Zijlstra 2006-10-17 12:33 ` Oleg Nesterov 0 siblings, 1 reply; 10+ messages in thread From: Peter Zijlstra @ 2006-10-17 10:17 UTC (permalink / raw) To: Oleg Nesterov; +Cc: linux-kernel, Prarit Bhargava, Alan Cox On Tue, 2006-10-17 at 12:10 +0400, Oleg Nesterov wrote: > On 10/16, Peter Zijlstra wrote: > > > > Oleg wrote: > > "Historically ->signal/->sighand (both ptrs and their contents) were globally > > protected by tasklist_lock. 'current' can use these pointers lockless, they > > can't be changed under him. > > > > Nowadays ->signal/->sighand are _also_ protected by ->sighand->siglock. > > Unless you are current, you can't lock ->siglock directly (without holding > > tasklist_lock), you should use lock_task_sighand()." > > > > Then, to be consistent with the rest of the kernel, ->signal->tty > > locking should look like so: > > > > mutex_lock(&tty_mutex) > > read_lock(&tasklist_lock) > > lock_task_sighand(p, &flags) > > I've also started similar patches, but have no time to finish it. > > I don't think we need tasklist_lock. I think ->sighand->siglock is enough. Right, sys_unshare() makes tasklist_lock meaningless wrt ->siglock. > So do_task_stat() doesn't need to take tty_mutex at all. > > However, tty_mutex protects ->tty from release_dev(tty), so it is also > possible to do: > > mutex_lock(&tty_mutex); > tty = task->signal->tty; > barrier(); > if (tty) { > // ->tty could be changed/cleared from under us, > // but it can't be released while we are holding > // tty_mutex > do_something(tty); > } > ... Nice, I think we have to convert all those callers like sys_vhangup() to this form. > > @@ -1350,20 +1357,26 @@ static void do_tty_hangup(void *data) > > This should get done automatically when the port closes and > > tty_release is called */ > > > > + mutex_lock(&tty_mutex); > > I am not sure it is needed. Right, this would only be needed when using the tty, not when changing signal->tty. > > read_lock(&tasklist_lock); > > if (tty->session > 0) { > > do_each_task_pid(tty->session, PIDTYPE_SID, p) { > > + lock_task_sighand(p, &flags); > > if (p->signal->tty == tty) > > p->signal->tty = NULL; > > + unlock_task_sighand(p, &flags); > > We don't need lock_task_sighand() here, we can use spin_lock_irq(->siglock). > > We are holding tasklist_lock. This means that all tasks found by > do_each_task_pid() have a valid ->signal/->sighand != NULL. > tasklist_lock protects against release_task()->__exit_signal() and > from changing ->sighand by de_thread(). I think sys_unshare() spoils the game here; it changes ->sighand in midair without holding tasklist_lock. So any ->sighand but current's is fair game. Hmm, either sys_unshare() is broken in that it doesn't take the tasklist_lock or a lot of other code is broken. let us take send_sig_info() vs. sys_unshare() 1 2 read_lock(&tasklist_lock) spin_lock_irqsave(&p->sighand->siglock, flags); rcu_assign_pointer(current->sighand, new_sigh) spin_unlock_irqsave(&p->sighand->siglock, flags); read_unlock(&tasklist_lock); what happens when 2's current is 1's p.... > > @@ -2910,20 +2953,23 @@ static int tiocsctty(struct tty_struct * > > > > read_lock(&tasklist_lock); > > do_each_task_pid(tty->session, PIDTYPE_SID, p) { > > + lock_task_sighand(p, &flags); > > p->signal->tty = NULL; > > + unlock_task_sighand(p, &flags); > > } while_each_task_pid(tty->session, PIDTYPE_SID, p); > > read_unlock(&tasklist_lock); > > - } else > > + } else { > > + mutex_unlock(&tty_mutex); > > return -EPERM; > > + } > > } > > - mutex_lock(&tty_mutex); > > - task_lock(current); > > - current->signal->tty = tty; > > - task_unlock(current); > > - mutex_unlock(&tty_mutex); > > - current->signal->tty_old_pgrp = 0; > > tty->session = current->signal->session; > > tty->pgrp = process_group(current); > > + lock_task_sighand(current, &flags); > > + current->signal->tty = tty; > > + current->signal->tty_old_pgrp = 0; > > + unlock_task_sighand(current, &flags); > > + mutex_unlock(&tty_mutex); > > return 0; > > } > > There is a very similar code in tty_open(), probably we need another > helper, proc_set_tty(). > > But I am not sure about locking. I think we should check > ->signal->leader/->signal->tty and set ->tty in proc_set_tty() > under ->siglock, this way we can remove tty_mutex from sys_setsid(). Right, use tty_mutex when using the tty, use ->sighand when changing signal->tty. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC][PATCH] ->signal->tty locking 2006-10-17 10:17 ` Peter Zijlstra @ 2006-10-17 12:33 ` Oleg Nesterov 2006-10-17 13:00 ` Peter Zijlstra 2006-10-17 13:29 ` Alan Cox 0 siblings, 2 replies; 10+ messages in thread From: Oleg Nesterov @ 2006-10-17 12:33 UTC (permalink / raw) To: Peter Zijlstra; +Cc: linux-kernel, Prarit Bhargava, Alan Cox On 10/17, Peter Zijlstra wrote: > > On Tue, 2006-10-17 at 12:10 +0400, Oleg Nesterov wrote: > > > > We don't need lock_task_sighand() here, we can use spin_lock_irq(->siglock). > > > > We are holding tasklist_lock. This means that all tasks found by > > do_each_task_pid() have a valid ->signal/->sighand != NULL. > > tasklist_lock protects against release_task()->__exit_signal() and > > from changing ->sighand by de_thread(). > > I think sys_unshare() spoils the game here; it changes ->sighand in > midair without holding tasklist_lock. So any ->sighand but current's is > fair game. > > Hmm, either sys_unshare() is broken in that it doesn't take the > tasklist_lock or a lot of other code is broken. Yes, it is broken, please look at http://marc.theaimsgroup.com/?t=114253118100003 I sent a patch, http://marc.theaimsgroup.com/?l=linux-kernel&m=114268787415193 but it was ignored. Probably I should re-send it. > Right, use tty_mutex when using the tty, use ->sighand when changing > signal->tty. I think that things like do_task_stat()/do_acct_process() do not need global tty_mutex, they can use ->siglock. Oleg. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC][PATCH] ->signal->tty locking 2006-10-17 12:33 ` Oleg Nesterov @ 2006-10-17 13:00 ` Peter Zijlstra 2006-10-17 14:08 ` Alan Cox 2006-10-18 16:55 ` Oleg Nesterov 2006-10-17 13:29 ` Alan Cox 1 sibling, 2 replies; 10+ messages in thread From: Peter Zijlstra @ 2006-10-17 13:00 UTC (permalink / raw) To: Oleg Nesterov; +Cc: linux-kernel, Prarit Bhargava, Alan Cox On Tue, 2006-10-17 at 16:33 +0400, Oleg Nesterov wrote: > On 10/17, Peter Zijlstra wrote: > > Hmm, either sys_unshare() is broken in that it doesn't take the > > tasklist_lock or a lot of other code is broken. > > Yes, it is broken, please look at Ah good :-) How about something like this; I'm still shaky on the lifetime rules of tty objects, I'm about to add a refcount and spinlock/mutex to tty_struct, this is madness.... --- arch/sparc64/solaris/misc.c | 4 - arch/um/kernel/exec.c | 7 +- drivers/char/tty_io.c | 126 ++++++++++++++++++++++++-------------------- drivers/s390/char/fs3270.c | 10 ++- fs/dquot.c | 14 ++-- fs/open.c | 1 include/linux/tty.h | 28 +++++++++ kernel/acct.c | 9 +-- kernel/auditsc.c | 2 kernel/exit.c | 4 - kernel/sys.c | 6 +- security/selinux/hooks.c | 15 +++-- 12 files changed, 141 insertions(+), 85 deletions(-) Index: linux-2.6.18.noarch/drivers/char/tty_io.c =================================================================== --- linux-2.6.18.noarch.orig/drivers/char/tty_io.c +++ linux-2.6.18.noarch/drivers/char/tty_io.c @@ -126,7 +126,7 @@ EXPORT_SYMBOL(tty_std_termios); LIST_HEAD(tty_drivers); /* linked list of tty drivers */ -/* Semaphore to protect creating and releasing a tty. This is shared with +/* Mutex to protect creating and releasing a tty. This is shared with vt.c for deeply disgusting hack reasons */ DEFINE_MUTEX(tty_mutex); EXPORT_SYMBOL(tty_mutex); @@ -616,7 +616,7 @@ EXPORT_SYMBOL_GPL(tty_prepare_flip_strin * they are not on hot paths so a little discipline won't do * any harm. * - * Locking: takes termios_sem + * Locking: takes termios_mutex */ static void tty_set_termios_ldisc(struct tty_struct *tty, int num) @@ -917,7 +917,7 @@ static void tty_ldisc_enable(struct tty_ * context. * * Locking: takes tty_ldisc_lock. - * called functions take termios_sem + * called functions take termios_mutex */ static int tty_set_ldisc(struct tty_struct *tty, int ldisc) @@ -1269,12 +1269,12 @@ EXPORT_SYMBOL_GPL(tty_ldisc_flush); * * Locking: * BKL - * redirect lock for undoing redirection - * file list lock for manipulating list of ttys - * tty_ldisc_lock from called functions - * termios_sem resetting termios data - * tasklist_lock to walk task list for hangup event - * + * redirect lock for undoing redirection + * file list lock for manipulating list of ttys + * tty_ldisc_lock from called functions + * termios_mutex resetting termios data + * tasklist_lock to walk task list for hangup event + * ->siglock to protect ->signal/->sighand */ static void do_tty_hangup(void *data) { @@ -1355,14 +1355,18 @@ static void do_tty_hangup(void *data) read_lock(&tasklist_lock); if (tty->session > 0) { do_each_task_pid(tty->session, PIDTYPE_SID, p) { + spin_lock_irq(&p->sighand->siglock); if (p->signal->tty == tty) p->signal->tty = NULL; - if (!p->signal->leader) + if (!p->signal->leader) { + spin_unlock_irq(&p->sighand->siglock); continue; - group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p); - group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p); + } + __group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p); + __group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p); if (tty->pgrp > 0) p->signal->tty_old_pgrp = tty->pgrp; + spin_unlock_irq(&p->sighand->siglock); } while_each_task_pid(tty->session, PIDTYPE_SID, p); } read_unlock(&tasklist_lock); @@ -1470,10 +1474,12 @@ EXPORT_SYMBOL(tty_hung_up_p); * The argument on_exit is set to 1 if called when a process is * exiting; it is 0 if called by the ioctl TIOCNOTTY. * - * Locking: tty_mutex is taken to protect current->signal->tty + * Locking: * BKL is taken for hysterical raisins - * Tasklist lock is taken (under tty_mutex) to walk process - * lists for the session. + * tty_mutex is taken to protect tty + * ->siglock is taken to protect ->signal/->sighand + * tasklist_lock is taken to walk process list for sessions + * ->siglock is taken to protect ->signal/->sighand */ void disassociate_ctty(int on_exit) @@ -1481,20 +1487,23 @@ void disassociate_ctty(int on_exit) struct tty_struct *tty; struct task_struct *p; int tty_pgrp = -1; + int session; lock_kernel(); mutex_lock(&tty_mutex); - tty = current->signal->tty; + tty = current_get_tty(); if (tty) { tty_pgrp = tty->pgrp; mutex_unlock(&tty_mutex); + /* XXX: here we race, there is nothing protecting tty */ if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY) tty_vhangup(tty); } else { - if (current->signal->tty_old_pgrp) { - kill_pg(current->signal->tty_old_pgrp, SIGHUP, on_exit); - kill_pg(current->signal->tty_old_pgrp, SIGCONT, on_exit); + pid_t old_pgrp = current->signal->tty_old_pgrp; + if (old_pgrp) { + kill_pg(old_pgrp, SIGHUP, on_exit); + kill_pg(old_pgrp, SIGCONT, on_exit); } mutex_unlock(&tty_mutex); unlock_kernel(); @@ -1506,19 +1515,29 @@ void disassociate_ctty(int on_exit) kill_pg(tty_pgrp, SIGCONT, on_exit); } - /* Must lock changes to tty_old_pgrp */ - mutex_lock(&tty_mutex); + spin_lock_irq(¤t->sighand->siglock); current->signal->tty_old_pgrp = 0; - tty->session = 0; - tty->pgrp = -1; + session = current->signal->session; + spin_unlock_irq(¤t->sighand->siglock); + + mutex_lock(&tty_mutex); + /* It is possible that do_tty_hangup has free'd this tty */ + tty = current_get_tty(); + if (tty) { + tty->session = 0; + tty->pgrp = 0; + } else { +#ifdef TTY_DEBUG_HANGUP + printk(KERN_DEBUG "error attempted to write to tty [0x%p]" + " = NULL", tty); +#endif + } + mutex_unlock(&tty_mutex); /* Now clear signal->tty under the lock */ read_lock(&tasklist_lock); - do_each_task_pid(current->signal->session, PIDTYPE_SID, p) { - p->signal->tty = NULL; - } while_each_task_pid(current->signal->session, PIDTYPE_SID, p); + session_clear_tty(session); read_unlock(&tasklist_lock); - mutex_unlock(&tty_mutex); unlock_kernel(); } @@ -2340,13 +2359,9 @@ static void release_dev(struct file * fi struct task_struct *p; read_lock(&tasklist_lock); - do_each_task_pid(tty->session, PIDTYPE_SID, p) { - p->signal->tty = NULL; - } while_each_task_pid(tty->session, PIDTYPE_SID, p); + session_clear_tty(tty->session); if (o_tty) - do_each_task_pid(o_tty->session, PIDTYPE_SID, p) { - p->signal->tty = NULL; - } while_each_task_pid(o_tty->session, PIDTYPE_SID, p); + session_clear_tty(o_tty->session); read_unlock(&tasklist_lock); } @@ -2467,12 +2482,13 @@ retry_open: mutex_lock(&tty_mutex); if (device == MKDEV(TTYAUX_MAJOR,0)) { - if (!current->signal->tty) { + tty = current_get_tty(); + if (!tty) { mutex_unlock(&tty_mutex); return -ENXIO; } - driver = current->signal->tty->driver; - index = current->signal->tty->index; + driver = tty->driver; + index = tty->index; filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */ /* noctty = 1; */ goto got_driver; @@ -2547,17 +2563,21 @@ got_driver: filp->f_op = &tty_fops; goto retry_open; } + + mutex_lock(&tty_mutex); + spin_lock_irq(¤t->sighand->siglock); if (!noctty && current->signal->leader && !current->signal->tty && tty->session == 0) { - task_lock(current); current->signal->tty = tty; - task_unlock(current); current->signal->tty_old_pgrp = 0; tty->session = current->signal->session; tty->pgrp = process_group(current); } + /* don't we leak tty in the else case !? */ + spin_unlock_irq(¤t->sighand->siglock); + mutex_unlock(&tty_mutex); return 0; } @@ -2899,6 +2919,7 @@ static int tiocsctty(struct tty_struct * */ if (!current->signal->leader || current->signal->tty) return -EPERM; + mutex_lock(&tty_mutex); if (tty->session > 0) { /* * This tty is already the controlling @@ -2908,23 +2929,21 @@ static int tiocsctty(struct tty_struct * /* * Steal it away */ - read_lock(&tasklist_lock); - do_each_task_pid(tty->session, PIDTYPE_SID, p) { - p->signal->tty = NULL; - } while_each_task_pid(tty->session, PIDTYPE_SID, p); + session_clear_tty(tty->session); read_unlock(&tasklist_lock); - } else + } else { + mutex_unlock(&tty_mutex); return -EPERM; + } } - mutex_lock(&tty_mutex); - task_lock(current); - current->signal->tty = tty; - task_unlock(current); - mutex_unlock(&tty_mutex); - current->signal->tty_old_pgrp = 0; + spin_lock_irq(¤t->sighand->siglock); tty->session = current->signal->session; tty->pgrp = process_group(current); + current->signal->tty = tty; + current->signal->tty_old_pgrp = 0; + spin_unlock_irq(¤t->sighand->siglock); + mutex_unlock(&tty_mutex); return 0; } @@ -2937,7 +2956,7 @@ static int tiocsctty(struct tty_struct * * Obtain the process group of the tty. If there is no process group * return an error. * - * Locking: none. Reference to ->signal->tty is safe. + * Locking: none. Reference to current->signal->tty is safe. */ static int tiocgpgrp(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) @@ -2995,7 +3014,7 @@ static int tiocspgrp(struct tty_struct * * Obtain the session id of the tty. If there is no session * return an error. * - * Locking: none. Reference to ->signal->tty is safe. + * Locking: none. Reference to current->signal->tty is safe. */ static int tiocgsid(struct tty_struct *tty, struct tty_struct *real_tty, pid_t __user *p) @@ -3214,14 +3233,11 @@ int tty_ioctl(struct inode * inode, stru clear_bit(TTY_EXCLUSIVE, &tty->flags); return 0; case TIOCNOTTY: - /* FIXME: taks lock or tty_mutex ? */ if (current->signal->tty != tty) return -ENOTTY; if (current->signal->leader) disassociate_ctty(0); - task_lock(current); - current->signal->tty = NULL; - task_unlock(current); + proc_set_tty(current, NULL); return 0; case TIOCSCTTY: return tiocsctty(tty, arg); Index: linux-2.6.18.noarch/kernel/auditsc.c =================================================================== --- linux-2.6.18.noarch.orig/kernel/auditsc.c +++ linux-2.6.18.noarch/kernel/auditsc.c @@ -819,10 +819,12 @@ static void audit_log_exit(struct audit_ context->return_code); mutex_lock(&tty_mutex); + read_lock(&tasklist_lock); if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name) tty = tsk->signal->tty->name; else tty = "(none)"; + read_unlock(&tasklist_lock); audit_log_format(ab, " a0=%lx a1=%lx a2=%lx a3=%lx items=%d" " ppid=%d pid=%d auid=%u uid=%u gid=%u" Index: linux-2.6.18.noarch/kernel/exit.c =================================================================== --- linux-2.6.18.noarch.orig/kernel/exit.c +++ linux-2.6.18.noarch/kernel/exit.c @@ -382,9 +382,7 @@ void daemonize(const char *name, ...) exit_mm(current); set_special_pids(1, 1); - mutex_lock(&tty_mutex); - current->signal->tty = NULL; - mutex_unlock(&tty_mutex); + proc_set_tty(current, NULL); /* Block and flush all signals */ sigfillset(&blocked); Index: linux-2.6.18.noarch/kernel/sys.c =================================================================== --- linux-2.6.18.noarch.orig/kernel/sys.c +++ linux-2.6.18.noarch/kernel/sys.c @@ -1379,7 +1379,6 @@ asmlinkage long sys_setsid(void) pid_t session; int err = -EPERM; - mutex_lock(&tty_mutex); write_lock_irq(&tasklist_lock); /* Fail if I am already a session leader */ @@ -1399,12 +1398,15 @@ asmlinkage long sys_setsid(void) group_leader->signal->leader = 1; __set_special_pids(session, session); + + spin_lock(&group_leader->sighand->siglock); group_leader->signal->tty = NULL; group_leader->signal->tty_old_pgrp = 0; + spin_unlock(&group_leader->sighand->siglock); + err = process_group(group_leader); out: write_unlock_irq(&tasklist_lock); - mutex_unlock(&tty_mutex); return err; } Index: linux-2.6.18.noarch/arch/sparc64/solaris/misc.c =================================================================== --- linux-2.6.18.noarch.orig/arch/sparc64/solaris/misc.c +++ linux-2.6.18.noarch/arch/sparc64/solaris/misc.c @@ -423,9 +423,7 @@ asmlinkage int solaris_procids(int cmd, Solaris setpgrp and setsid? */ ret = sys_setpgid(0, 0); if (ret) return ret; - mutex_lock(&tty_mutex); - current->signal->tty = NULL; - mutex_unlock(&tty_mutex); + proc_set_tty(current, NULL); return process_group(current); } case 2: /* getsid */ Index: linux-2.6.18.noarch/arch/um/kernel/exec.c =================================================================== --- linux-2.6.18.noarch.orig/arch/um/kernel/exec.c +++ linux-2.6.18.noarch/arch/um/kernel/exec.c @@ -39,12 +39,13 @@ static long execve1(char *file, char __u char __user *__user *env) { long error; + struct tty_struct *tty; #ifdef CONFIG_TTY_LOG mutex_lock(&tty_mutex); - task_lock(current); /* FIXME: is this needed ? */ - log_exec(argv, current->signal->tty); - task_unlock(current); + tty = current_get_tty(); + if (tty) + log_exec(argv, tty); mutex_unlock(&tty_mutex); #endif error = do_execve(file, argv, env, ¤t->thread.regs); Index: linux-2.6.18.noarch/drivers/s390/char/fs3270.c =================================================================== --- linux-2.6.18.noarch.orig/drivers/s390/char/fs3270.c +++ linux-2.6.18.noarch/drivers/s390/char/fs3270.c @@ -425,11 +425,15 @@ fs3270_open(struct inode *inode, struct minor = iminor(filp->f_dentry->d_inode); /* Check for minor 0 multiplexer. */ if (minor == 0) { - if (!current->signal->tty) + struct tty_struct *tty; + mutex_lock(&tty_mutex); + tty = current_get_tty(); + if (!tty) return -ENODEV; - if (current->signal->tty->driver->major != IBM_TTY3270_MAJOR) + if (tty->driver->major != IBM_TTY3270_MAJOR) return -ENODEV; - minor = current->signal->tty->index + RAW3270_FIRSTMINOR; + minor = tty->index + RAW3270_FIRSTMINOR; + mutex_unlock(&tty_mutex); } /* Check if some other program is already using fullscreen mode. */ fp = (struct fs3270 *) raw3270_find_view(&fs3270_fn, minor); Index: linux-2.6.18.noarch/fs/dquot.c =================================================================== --- linux-2.6.18.noarch.orig/fs/dquot.c +++ linux-2.6.18.noarch/fs/dquot.c @@ -828,6 +828,7 @@ static inline int need_print_warning(str static void print_warning(struct dquot *dquot, const char warntype) { char *msg = NULL; + struct tty_struct *tty; int flag = (warntype == BHARDWARN || warntype == BSOFTLONGWARN) ? DQ_BLKS_B : ((warntype == IHARDWARN || warntype == ISOFTLONGWARN) ? DQ_INODES_B : 0); @@ -835,14 +836,15 @@ static void print_warning(struct dquot * return; mutex_lock(&tty_mutex); - if (!current->signal->tty) + tty = current_get_tty(); + if (!tty) goto out_lock; - tty_write_message(current->signal->tty, dquot->dq_sb->s_id); + tty_write_message(tty, dquot->dq_sb->s_id); if (warntype == ISOFTWARN || warntype == BSOFTWARN) - tty_write_message(current->signal->tty, ": warning, "); + tty_write_message(tty, ": warning, "); else - tty_write_message(current->signal->tty, ": write failed, "); - tty_write_message(current->signal->tty, quotatypes[dquot->dq_type]); + tty_write_message(tty, ": write failed, "); + tty_write_message(tty, quotatypes[dquot->dq_type]); switch (warntype) { case IHARDWARN: msg = " file limit reached.\r\n"; @@ -863,7 +865,7 @@ static void print_warning(struct dquot * msg = " block quota exceeded.\r\n"; break; } - tty_write_message(current->signal->tty, msg); + tty_write_message(tty, msg); out_lock: mutex_unlock(&tty_mutex); } Index: linux-2.6.18.noarch/fs/open.c =================================================================== --- linux-2.6.18.noarch.orig/fs/open.c +++ linux-2.6.18.noarch/fs/open.c @@ -1204,6 +1204,7 @@ EXPORT_SYMBOL(sys_close); asmlinkage long sys_vhangup(void) { if (capable(CAP_SYS_TTY_CONFIG)) { + /* XXX: this needs locking */ tty_vhangup(current->signal->tty); return 0; } Index: linux-2.6.18.noarch/include/linux/tty.h =================================================================== --- linux-2.6.18.noarch.orig/include/linux/tty.h +++ linux-2.6.18.noarch/include/linux/tty.h @@ -338,5 +338,33 @@ static inline dev_t tty_devnum(struct tt return MKDEV(tty->driver->major, tty->driver->minor_start) + tty->index; } +static inline void proc_set_tty(struct task_struct *p, struct tty_struct *tty) +{ + spin_lock_irq(&p->sighand->siglock); + p->signal->tty = tty; + spin_unlock_irq(&p->sighand->siglock); +} + +static inline void session_clear_tty(pid_t session) +{ + struct task_struct *p; + do_each_task_pid(session, PIDTYPE_SID, p) { + proc_set_tty(p, NULL); + } while_each_task_pid(session, PIDTYPE_SID, p); +} + +static inline void current_get_tty(void) +{ + struct tty_struct *tty; + WARN_ON_ONCE(!mutex_locked(&tty_mutex)); + tty = current->session->tty; + barrier(); + /* + * ->tty could be changed/cleared from under us, but it can't be + * released while we are holding tty_mutex + */ + return tty; +} + #endif /* __KERNEL__ */ #endif Index: linux-2.6.18.noarch/kernel/acct.c =================================================================== --- linux-2.6.18.noarch.orig/kernel/acct.c +++ linux-2.6.18.noarch/kernel/acct.c @@ -427,6 +427,7 @@ static void do_acct_process(struct file u64 elapsed; u64 run_time; struct timespec uptime; + struct tty_struct *tty; /* * First check to see if there is enough free_space to continue @@ -484,12 +485,8 @@ static void do_acct_process(struct file #endif mutex_lock(&tty_mutex); - /* FIXME: Whoever is responsible for current->signal locking needs - to use the same locking all over the kernel and document it */ - read_lock(&tasklist_lock); - ac.ac_tty = current->signal->tty ? - old_encode_dev(tty_devnum(current->signal->tty)) : 0; - read_unlock(&tasklist_lock); + tty = current_get_tty(); + ac.ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0; mutex_unlock(&tty_mutex); spin_lock_irq(¤t->sighand->siglock); Index: linux-2.6.18.noarch/security/selinux/hooks.c =================================================================== --- linux-2.6.18.noarch.orig/security/selinux/hooks.c +++ linux-2.6.18.noarch/security/selinux/hooks.c @@ -1708,9 +1708,10 @@ static inline void flush_unauthorized_fi struct tty_struct *tty; struct fdtable *fdt; long j = -1; + int drop_tty = 0; mutex_lock(&tty_mutex); - tty = current->signal->tty; + tty = current_get_tty(); if (tty) { file_list_lock(); file = list_entry(tty->tty_files.next, typeof(*file), f_u.fu_list); @@ -1723,12 +1724,18 @@ static inline void flush_unauthorized_fi struct inode *inode = file->f_dentry->d_inode; if (inode_has_perm(current, inode, FILE__READ | FILE__WRITE, NULL)) { - /* Reset controlling tty. */ - current->signal->tty = NULL; - current->signal->tty_old_pgrp = 0; + drop_tty = 1; } } file_list_unlock(); + + if (drop_tty) { + /* Reset controlling tty. */ + spin_lock_irq(¤t->sighand->siglock); + current->signal->tty = NULL; + current->signal->tty_old_pgrp = 0; + spin_unlock_irq(¤t->sighand->siglock); + } } mutex_unlock(&tty_mutex); ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC][PATCH] ->signal->tty locking 2006-10-17 13:00 ` Peter Zijlstra @ 2006-10-17 14:08 ` Alan Cox 2006-10-18 16:55 ` Oleg Nesterov 1 sibling, 0 replies; 10+ messages in thread From: Alan Cox @ 2006-10-17 14:08 UTC (permalink / raw) To: Peter Zijlstra; +Cc: Oleg Nesterov, linux-kernel, Prarit Bhargava Ar Maw, 2006-10-17 am 15:00 +0200, ysgrifennodd Peter Zijlstra: > How about something like this; I'm still shaky on the lifetime rules of > tty objects, I'm about to add a refcount and spinlock/mutex to > tty_struct, this is madness.... It has two already. I wouldn't worry about being shaky about the lifetime rules, thats a forensics job at the moment. > + tty = current_get_tty(); Sensible way to go whichever path we use and once it returns a refcount bumped object in future it'll clean up a lot more. get_current_tty or just current_tty() would fit the naming in the kernel better I agree entirely with the path this patch is taking. Alan ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC][PATCH] ->signal->tty locking 2006-10-17 13:00 ` Peter Zijlstra 2006-10-17 14:08 ` Alan Cox @ 2006-10-18 16:55 ` Oleg Nesterov 1 sibling, 0 replies; 10+ messages in thread From: Oleg Nesterov @ 2006-10-18 16:55 UTC (permalink / raw) To: Peter Zijlstra; +Cc: linux-kernel, Prarit Bhargava, Alan Cox On 10/17, Peter Zijlstra wrote: > > How about something like this; I'm still shaky on the lifetime rules of > tty objects, I'm about to add a refcount and spinlock/mutex to > tty_struct, this is madness.... Sorry for delay, a couple of minor nits... > static void do_tty_hangup(void *data) > { > @@ -1355,14 +1355,18 @@ static void do_tty_hangup(void *data) > read_lock(&tasklist_lock); > if (tty->session > 0) { > do_each_task_pid(tty->session, PIDTYPE_SID, p) { > + spin_lock_irq(&p->sighand->siglock); > if (p->signal->tty == tty) > p->signal->tty = NULL; > - if (!p->signal->leader) > + if (!p->signal->leader) { > + spin_unlock_irq(&p->sighand->siglock); > continue; > - group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p); > - group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p); > + } > + __group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p); > + __group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p); So we are skipping security_task_kill() and audit_signal_info(). I don't claim this is bad, I just don't know. > @@ -2899,6 +2919,7 @@ static int tiocsctty(struct tty_struct * > */ > if (!current->signal->leader || current->signal->tty) > return -EPERM; > + mutex_lock(&tty_mutex); This is still racy (consider 2 threads doing tiocsctty() at the same time), probably it is better to take tty_mutex before the check? > --- linux-2.6.18.noarch.orig/include/linux/tty.h > +++ linux-2.6.18.noarch/include/linux/tty.h > @@ -338,5 +338,33 @@ static inline dev_t tty_devnum(struct tt > return MKDEV(tty->driver->major, tty->driver->minor_start) + tty->index; > } > > +static inline void proc_set_tty(struct task_struct *p, struct tty_struct *tty) > +{ > + spin_lock_irq(&p->sighand->siglock); > + p->signal->tty = tty; > + spin_unlock_irq(&p->sighand->siglock); > +} Note that it is always called with tty == NULL parameter. That is why I proposed proc_clear_tty(struct task_struct *p). We can't use this helper for tiocsctty/tty_open anyway. > +static inline void session_clear_tty(pid_t session) > +{ > + struct task_struct *p; > + do_each_task_pid(session, PIDTYPE_SID, p) { > + proc_set_tty(p, NULL); > + } while_each_task_pid(session, PIDTYPE_SID, p); > +} > + I'd suggest to move it to tty_io.c and make it static (not inline). > =================================================================== > --- linux-2.6.18.noarch.orig/security/selinux/hooks.c > +++ linux-2.6.18.noarch/security/selinux/hooks.c > @@ -1708,9 +1708,10 @@ static inline void flush_unauthorized_fi > struct tty_struct *tty; > struct fdtable *fdt; > long j = -1; > + int drop_tty = 0; > > mutex_lock(&tty_mutex); > - tty = current->signal->tty; > + tty = current_get_tty(); > if (tty) { > file_list_lock(); > file = list_entry(tty->tty_files.next, typeof(*file), f_u.fu_list); > @@ -1723,12 +1724,18 @@ static inline void flush_unauthorized_fi > struct inode *inode = file->f_dentry->d_inode; > if (inode_has_perm(current, inode, > FILE__READ | FILE__WRITE, NULL)) { > - /* Reset controlling tty. */ > - current->signal->tty = NULL; > - current->signal->tty_old_pgrp = 0; > + drop_tty = 1; > } > } > file_list_unlock(); > + > + if (drop_tty) { > + /* Reset controlling tty. */ > + spin_lock_irq(¤t->sighand->siglock); > + current->signal->tty = NULL; > + current->signal->tty_old_pgrp = 0; Probably the last line should go to proc_clear_tty() ? On the other hand, when signal->tty != NULL, ->tty_old_pgrp should be == 0, may be it is unneeded. In any case, I think we should use proc_set_tty() here. Oleg. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC][PATCH] ->signal->tty locking 2006-10-17 12:33 ` Oleg Nesterov 2006-10-17 13:00 ` Peter Zijlstra @ 2006-10-17 13:29 ` Alan Cox 2006-10-18 17:21 ` Oleg Nesterov 1 sibling, 1 reply; 10+ messages in thread From: Alan Cox @ 2006-10-17 13:29 UTC (permalink / raw) To: Oleg Nesterov; +Cc: Peter Zijlstra, linux-kernel, Prarit Bhargava Ar Maw, 2006-10-17 am 16:33 +0400, ysgrifennodd Oleg Nesterov: > I sent a patch, > http://marc.theaimsgroup.com/?l=linux-kernel&m=114268787415193 > > but it was ignored. Probably I should re-send it. Definitely - we still see reports of tty slab scribbles > > Right, use tty_mutex when using the tty, use ->sighand when changing > > signal->tty. > > I think that things like do_task_stat()/do_acct_process() do not need > global tty_mutex, they can use ->siglock. Please keep the tty_mutex as it will protect against other stuff later. Once tty is a bit saner then someone brave can refcount it properly and that'll make it much prettier. ^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [RFC][PATCH] ->signal->tty locking 2006-10-17 13:29 ` Alan Cox @ 2006-10-18 17:21 ` Oleg Nesterov 0 siblings, 0 replies; 10+ messages in thread From: Oleg Nesterov @ 2006-10-18 17:21 UTC (permalink / raw) To: Alan Cox; +Cc: Peter Zijlstra, linux-kernel, Prarit Bhargava On 10/17, Alan Cox wrote: > > Ar Maw, 2006-10-17 am 16:33 +0400, ysgrifennodd Oleg Nesterov: > > I sent a patch, > > http://marc.theaimsgroup.com/?l=linux-kernel&m=114268787415193 > > > > but it was ignored. Probably I should re-send it. > > Definitely - we still see reports of tty slab scribbles This patch can't fix anything, sorry for the confusion. Yes, the 'if (new_sigh) {}' code in sys_unshare() is broken, but it is never executed, because unshare_sighand() never populates new_sighp. Probably it is better to just remove this code. > > > Right, use tty_mutex when using the tty, use ->sighand when changing > > > signal->tty. > > > > I think that things like do_task_stat()/do_acct_process() do not need > > global tty_mutex, they can use ->siglock. > > Please keep the tty_mutex as it will protect against other stuff later. > Once tty is a bit saner then someone brave can refcount it properly and > that'll make it much prettier. Oh, but it is silly to take the global tty_mutex in do_task_stat(). Why do we need it if ->siglock protects ->signal->tty ? We are only reading the tty_struct, tty->driver can't go away ... Oleg. ^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2006-10-18 17:21 UTC | newest] Thread overview: 10+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2006-10-16 9:53 [RFC][PATCH] ->signal->tty locking Peter Zijlstra 2006-10-16 13:39 ` Prarit Bhargava 2006-10-17 8:10 ` Oleg Nesterov 2006-10-17 10:17 ` Peter Zijlstra 2006-10-17 12:33 ` Oleg Nesterov 2006-10-17 13:00 ` Peter Zijlstra 2006-10-17 14:08 ` Alan Cox 2006-10-18 16:55 ` Oleg Nesterov 2006-10-17 13:29 ` Alan Cox 2006-10-18 17:21 ` Oleg Nesterov
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox