public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Chris Wright <chrisw@sous-sol.org>
To: linux-kernel@vger.kernel.org, stable@kernel.org
Cc: Justin Forbes <jmforbes@linuxtx.org>,
	Zwane Mwaikambo <zwane@arm.linux.org.uk>,
	"Theodore Ts'o" <tytso@mit.edu>,
	Randy Dunlap <rdunlap@xenotime.net>,
	Dave Jones <davej@redhat.com>,
	Chuck Wolber <chuckw@quantumlinux.com>,
	Chris Wedgwood <reviews@ml.cw.f00f.org>,
	Michael Krufky <mkrufky@linuxtv.org>,
	Chuck Ebbert <cebbert@redhat.com>,
	Domenico Andreoli <cavokz@gmail.com>,
	torvalds@linux-foundation.org, akpm@linux-foundation.org,
	alan@lxorguk.ukuu.org.uk, David Miller <davem@davemloft.net>,
	bunk@stusta.de
Subject: [patch 30/32] Fix AF_UNIX OOPS
Date: Fri, 08 Jun 2007 00:15:41 -0700	[thread overview]
Message-ID: <20070608071557.973282000@sous-sol.org> (raw)
In-Reply-To: 20070608071511.159309000@sous-sol.org

[-- Attachment #1: fix-af_unix-oops.patch --]
[-- Type: text/plain, Size: 12822 bytes --]

-stable review patch.  If anyone has any objections, please let us know.
---------------------

From: David Miller <davem@davemloft.net>

This combines two upstream commits to fix an OOPS with
AF_UNIX and SELINUX.

basically, sk->sk_socket can become null because we access
a peer socket without any locking, so it can be shut down and
released in another thread.

Commit: d410b81b4eef2e4409f9c38ef201253fbbcc7d94
[AF_UNIX]: Make socket locking much less confusing.

The unix_state_*() locking macros imply that there is some
rwlock kind of thing going on, but the implementation is
actually a spinlock which makes the code more confusing than
it needs to be.

So use plain unix_state_lock and unix_state_unlock.

Signed-off-by: David S. Miller <davem@davemloft.net>

Commit: 19fec3e807a487415e77113cb9dbdaa2da739836
[AF_UNIX]: Fix datagram connect race causing an OOPS.

Based upon an excellent bug report and initial patch by
Frederik Deweerdt.

The UNIX datagram connect code blindly dereferences other->sk_socket
via the call down to the security_unix_may_send() function.

Without locking 'other' that pointer can go NULL via unix_release_sock()
which does sock_orphan() which also marks the socket SOCK_DEAD.

So we have to lock both 'sk' and 'other' yet avoid all kinds of
potential deadlocks (connect to self is OK for datagram sockets and it
is possible for two datagram sockets to perform a simultaneous connect
to each other).  So what we do is have a "double lock" function similar
to how we handle this situation in other areas of the kernel.  We take
the lock of the socket pointer with the smallest address first in
order to avoid ABBA style deadlocks.

Once we have them both locked, we check to see if SOCK_DEAD is set
for 'other' and if so, drop everything and retry the lookup.

Signed-off-by: David S. Miller <davem@davemloft.net>
[chrisw: backport to 2.6.20]
Signed-off-by: Chris Wright <chrisw@sous-sol.org>

---
 include/net/af_unix.h |    8 +--
 net/unix/af_unix.c    |  127 +++++++++++++++++++++++++++++++-------------------
 2 files changed, 83 insertions(+), 52 deletions(-)

--- linux-2.6.20.13.orig/include/net/af_unix.h
+++ linux-2.6.20.13/include/net/af_unix.h
@@ -62,13 +62,11 @@ struct unix_skb_parms {
 #define UNIXCREDS(skb)	(&UNIXCB((skb)).creds)
 #define UNIXSID(skb)	(&UNIXCB((skb)).secid)
 
-#define unix_state_rlock(s)	spin_lock(&unix_sk(s)->lock)
-#define unix_state_runlock(s)	spin_unlock(&unix_sk(s)->lock)
-#define unix_state_wlock(s)	spin_lock(&unix_sk(s)->lock)
-#define unix_state_wlock_nested(s) \
+#define unix_state_lock(s)	spin_lock(&unix_sk(s)->lock)
+#define unix_state_unlock(s)	spin_unlock(&unix_sk(s)->lock)
+#define unix_state_lock_nested(s) \
 				spin_lock_nested(&unix_sk(s)->lock, \
 				SINGLE_DEPTH_NESTING)
-#define unix_state_wunlock(s)	spin_unlock(&unix_sk(s)->lock)
 
 #ifdef __KERNEL__
 /* The AF_UNIX socket */
--- linux-2.6.20.13.orig/net/unix/af_unix.c
+++ linux-2.6.20.13/net/unix/af_unix.c
@@ -175,11 +175,11 @@ static struct sock *unix_peer_get(struct
 {
 	struct sock *peer;
 
-	unix_state_rlock(s);
+	unix_state_lock(s);
 	peer = unix_peer(s);
 	if (peer)
 		sock_hold(peer);
-	unix_state_runlock(s);
+	unix_state_unlock(s);
 	return peer;
 }
 
@@ -370,7 +370,7 @@ static int unix_release_sock (struct soc
 	unix_remove_socket(sk);
 
 	/* Clear state */
-	unix_state_wlock(sk);
+	unix_state_lock(sk);
 	sock_orphan(sk);
 	sk->sk_shutdown = SHUTDOWN_MASK;
 	dentry	     = u->dentry;
@@ -379,7 +379,7 @@ static int unix_release_sock (struct soc
 	u->mnt	     = NULL;
 	state = sk->sk_state;
 	sk->sk_state = TCP_CLOSE;
-	unix_state_wunlock(sk);
+	unix_state_unlock(sk);
 
 	wake_up_interruptible_all(&u->peer_wait);
 
@@ -387,12 +387,12 @@ static int unix_release_sock (struct soc
 
 	if (skpair!=NULL) {
 		if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
-			unix_state_wlock(skpair);
+			unix_state_lock(skpair);
 			/* No more writes */
 			skpair->sk_shutdown = SHUTDOWN_MASK;
 			if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 				skpair->sk_err = ECONNRESET;
-			unix_state_wunlock(skpair);
+			unix_state_unlock(skpair);
 			skpair->sk_state_change(skpair);
 			read_lock(&skpair->sk_callback_lock);
 			sk_wake_async(skpair,1,POLL_HUP);
@@ -449,7 +449,7 @@ static int unix_listen(struct socket *so
 	err = -EINVAL;
 	if (!u->addr)
 		goto out;			/* No listens on an unbound socket */
-	unix_state_wlock(sk);
+	unix_state_lock(sk);
 	if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 		goto out_unlock;
 	if (backlog > sk->sk_max_ack_backlog)
@@ -463,7 +463,7 @@ static int unix_listen(struct socket *so
 	err = 0;
 
 out_unlock:
-	unix_state_wunlock(sk);
+	unix_state_unlock(sk);
 out:
 	return err;
 }
@@ -859,6 +859,31 @@ out_mknod_parent:
 	goto out_up;
 }
 
+static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
+{
+	if (unlikely(sk1 == sk2) || !sk2) {
+		unix_state_lock(sk1);
+		return;
+	}
+	if (sk1 < sk2) {
+		unix_state_lock(sk1);
+		unix_state_lock_nested(sk2);
+	} else {
+		unix_state_lock(sk2);
+		unix_state_lock_nested(sk1);
+	}
+}
+
+static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
+{
+	if (unlikely(sk1 == sk2) || !sk2) {
+		unix_state_unlock(sk1);
+		return;
+	}
+	unix_state_unlock(sk1);
+	unix_state_unlock(sk2);
+}
+
 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 			      int alen, int flags)
 {
@@ -878,11 +903,19 @@ static int unix_dgram_connect(struct soc
 		    !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
 			goto out;
 
+restart:
 		other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
 		if (!other)
 			goto out;
 
-		unix_state_wlock(sk);
+		unix_state_double_lock(sk, other);
+
+		/* Apparently VFS overslept socket death. Retry. */
+		if (sock_flag(other, SOCK_DEAD)) {
+			unix_state_double_unlock(sk, other);
+			sock_put(other);
+			goto restart;
+		}
 
 		err = -EPERM;
 		if (!unix_may_send(sk, other))
@@ -897,7 +930,7 @@ static int unix_dgram_connect(struct soc
 		 *	1003.1g breaking connected state with AF_UNSPEC
 		 */
 		other = NULL;
-		unix_state_wlock(sk);
+		unix_state_double_lock(sk, other);
 	}
 
 	/*
@@ -906,19 +939,19 @@ static int unix_dgram_connect(struct soc
 	if (unix_peer(sk)) {
 		struct sock *old_peer = unix_peer(sk);
 		unix_peer(sk)=other;
-		unix_state_wunlock(sk);
+		unix_state_double_unlock(sk, other);
 
 		if (other != old_peer)
 			unix_dgram_disconnected(sk, old_peer);
 		sock_put(old_peer);
 	} else {
 		unix_peer(sk)=other;
-		unix_state_wunlock(sk);
+		unix_state_double_unlock(sk, other);
 	}
  	return 0;
 
 out_unlock:
-	unix_state_wunlock(sk);
+	unix_state_double_unlock(sk, other);
 	sock_put(other);
 out:
 	return err;
@@ -937,7 +970,7 @@ static long unix_wait_for_peer(struct so
 		(skb_queue_len(&other->sk_receive_queue) >
 		 other->sk_max_ack_backlog);
 
-	unix_state_runlock(other);
+	unix_state_unlock(other);
 
 	if (sched)
 		timeo = schedule_timeout(timeo);
@@ -995,11 +1028,11 @@ restart:
 		goto out;
 
 	/* Latch state of peer */
-	unix_state_rlock(other);
+	unix_state_lock(other);
 
 	/* Apparently VFS overslept socket death. Retry. */
 	if (sock_flag(other, SOCK_DEAD)) {
-		unix_state_runlock(other);
+		unix_state_unlock(other);
 		sock_put(other);
 		goto restart;
 	}
@@ -1049,18 +1082,18 @@ restart:
 		goto out_unlock;
 	}
 
-	unix_state_wlock_nested(sk);
+	unix_state_lock_nested(sk);
 
 	if (sk->sk_state != st) {
-		unix_state_wunlock(sk);
-		unix_state_runlock(other);
+		unix_state_unlock(sk);
+		unix_state_unlock(other);
 		sock_put(other);
 		goto restart;
 	}
 
 	err = security_unix_stream_connect(sock, other->sk_socket, newsk);
 	if (err) {
-		unix_state_wunlock(sk);
+		unix_state_unlock(sk);
 		goto out_unlock;
 	}
 
@@ -1097,7 +1130,7 @@ restart:
 	smp_mb__after_atomic_inc();	/* sock_hold() does an atomic_inc() */
 	unix_peer(sk)	= newsk;
 
-	unix_state_wunlock(sk);
+	unix_state_unlock(sk);
 
 	/* take ten and and send info to listening sock */
 	spin_lock(&other->sk_receive_queue.lock);
@@ -1106,14 +1139,14 @@ restart:
 	 * is installed to listening socket. */
 	atomic_inc(&newu->inflight);
 	spin_unlock(&other->sk_receive_queue.lock);
-	unix_state_runlock(other);
+	unix_state_unlock(other);
 	other->sk_data_ready(other, 0);
 	sock_put(other);
 	return 0;
 
 out_unlock:
 	if (other)
-		unix_state_runlock(other);
+		unix_state_unlock(other);
 
 out:
 	if (skb)
@@ -1179,10 +1212,10 @@ static int unix_accept(struct socket *so
 	wake_up_interruptible(&unix_sk(sk)->peer_wait);
 
 	/* attach accepted sock to socket */
-	unix_state_wlock(tsk);
+	unix_state_lock(tsk);
 	newsock->state = SS_CONNECTED;
 	sock_graft(tsk, newsock);
-	unix_state_wunlock(tsk);
+	unix_state_unlock(tsk);
 	return 0;
 
 out:
@@ -1209,7 +1242,7 @@ static int unix_getname(struct socket *s
 	}
 
 	u = unix_sk(sk);
-	unix_state_rlock(sk);
+	unix_state_lock(sk);
 	if (!u->addr) {
 		sunaddr->sun_family = AF_UNIX;
 		sunaddr->sun_path[0] = 0;
@@ -1220,7 +1253,7 @@ static int unix_getname(struct socket *s
 		*uaddr_len = addr->len;
 		memcpy(sunaddr, addr->name, *uaddr_len);
 	}
-	unix_state_runlock(sk);
+	unix_state_unlock(sk);
 	sock_put(sk);
 out:
 	return err;
@@ -1338,7 +1371,7 @@ restart:
 			goto out_free;
 	}
 
-	unix_state_rlock(other);
+	unix_state_lock(other);
 	err = -EPERM;
 	if (!unix_may_send(sk, other))
 		goto out_unlock;
@@ -1348,20 +1381,20 @@ restart:
 		 *	Check with 1003.1g - what should
 		 *	datagram error
 		 */
-		unix_state_runlock(other);
+		unix_state_unlock(other);
 		sock_put(other);
 
 		err = 0;
-		unix_state_wlock(sk);
+		unix_state_lock(sk);
 		if (unix_peer(sk) == other) {
 			unix_peer(sk)=NULL;
-			unix_state_wunlock(sk);
+			unix_state_unlock(sk);
 
 			unix_dgram_disconnected(sk, other);
 			sock_put(other);
 			err = -ECONNREFUSED;
 		} else {
-			unix_state_wunlock(sk);
+			unix_state_unlock(sk);
 		}
 
 		other = NULL;
@@ -1398,14 +1431,14 @@ restart:
 	}
 
 	skb_queue_tail(&other->sk_receive_queue, skb);
-	unix_state_runlock(other);
+	unix_state_unlock(other);
 	other->sk_data_ready(other, len);
 	sock_put(other);
 	scm_destroy(siocb->scm);
 	return len;
 
 out_unlock:
-	unix_state_runlock(other);
+	unix_state_unlock(other);
 out_free:
 	kfree_skb(skb);
 out:
@@ -1495,14 +1528,14 @@ static int unix_stream_sendmsg(struct ki
 			goto out_err;
 		}
 
-		unix_state_rlock(other);
+		unix_state_lock(other);
 
 		if (sock_flag(other, SOCK_DEAD) ||
 		    (other->sk_shutdown & RCV_SHUTDOWN))
 			goto pipe_err_free;
 
 		skb_queue_tail(&other->sk_receive_queue, skb);
-		unix_state_runlock(other);
+		unix_state_unlock(other);
 		other->sk_data_ready(other, size);
 		sent+=size;
 	}
@@ -1513,7 +1546,7 @@ static int unix_stream_sendmsg(struct ki
 	return sent;
 
 pipe_err_free:
-	unix_state_runlock(other);
+	unix_state_unlock(other);
 	kfree_skb(skb);
 pipe_err:
 	if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
@@ -1642,7 +1675,7 @@ static long unix_stream_data_wait(struct
 {
 	DEFINE_WAIT(wait);
 
-	unix_state_rlock(sk);
+	unix_state_lock(sk);
 
 	for (;;) {
 		prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
@@ -1655,14 +1688,14 @@ static long unix_stream_data_wait(struct
 			break;
 
 		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
-		unix_state_runlock(sk);
+		unix_state_unlock(sk);
 		timeo = schedule_timeout(timeo);
-		unix_state_rlock(sk);
+		unix_state_lock(sk);
 		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
 	}
 
 	finish_wait(sk->sk_sleep, &wait);
-	unix_state_runlock(sk);
+	unix_state_unlock(sk);
 	return timeo;
 }
 
@@ -1817,12 +1850,12 @@ static int unix_shutdown(struct socket *
 	mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
 
 	if (mode) {
-		unix_state_wlock(sk);
+		unix_state_lock(sk);
 		sk->sk_shutdown |= mode;
 		other=unix_peer(sk);
 		if (other)
 			sock_hold(other);
-		unix_state_wunlock(sk);
+		unix_state_unlock(sk);
 		sk->sk_state_change(sk);
 
 		if (other &&
@@ -1834,9 +1867,9 @@ static int unix_shutdown(struct socket *
 				peer_mode |= SEND_SHUTDOWN;
 			if (mode&SEND_SHUTDOWN)
 				peer_mode |= RCV_SHUTDOWN;
-			unix_state_wlock(other);
+			unix_state_lock(other);
 			other->sk_shutdown |= peer_mode;
-			unix_state_wunlock(other);
+			unix_state_unlock(other);
 			other->sk_state_change(other);
 			read_lock(&other->sk_callback_lock);
 			if (peer_mode == SHUTDOWN_MASK)
@@ -1974,7 +2007,7 @@ static int unix_seq_show(struct seq_file
 	else {
 		struct sock *s = v;
 		struct unix_sock *u = unix_sk(s);
-		unix_state_rlock(s);
+		unix_state_lock(s);
 
 		seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
 			s,
@@ -2002,7 +2035,7 @@ static int unix_seq_show(struct seq_file
 			for ( ; i < len; i++)
 				seq_putc(seq, u->addr->name->sun_path[i]);
 		}
-		unix_state_runlock(s);
+		unix_state_unlock(s);
 		seq_putc(seq, '\n');
 	}
 

-- 

  parent reply	other threads:[~2007-06-08  7:23 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-06-08  7:15 [patch 00/32] 2.6.20-stable review Chris Wright
2007-06-08  7:15 ` [patch 01/32] pv6: track device renames in snmp6 Chris Wright
2007-06-08  7:15 ` [patch 02/32] oom: kill all threads that share mm with killed task Chris Wright
2007-06-08  7:15 ` [patch 03/32] x86-64: Always flush all pages in change_page_attr Chris Wright
2007-06-08  7:15 ` [patch 04/32] smc911x: fix compilation breakage wjen debug is on Chris Wright
2007-06-08  7:15 ` [patch 05/32] iop13xx: fix i/o address translation Chris Wright
2007-06-08  7:15 ` [patch 06/32] [NETFILTER]: {ip, nf}_nat_proto_gre: do not modify/corrupt GREv0 packets through NAT Chris Wright
2007-06-08  7:15 ` [patch 07/32] sata_via: add missing PM hooks Chris Wright
2007-06-08  7:15 ` [patch 08/32] driver-core: dont free devt_attr till the device is released Chris Wright
2007-06-08  7:15 ` [patch 09/32] JFS: Fix race waking up jfsIO kernel thread Chris Wright
2007-06-08  7:15 ` [patch 10/32] CRYPTO: api: Read module pointer before freeing algorithm Chris Wright
2007-06-08  7:15 ` [patch 11/32] fuse: fix mknod of regular file Chris Wright
2007-06-08  7:15 ` [patch 12/32] acpi-thermal: fix mod_timer() interval Chris Wright
2007-06-08  7:15 ` [patch 13/32] ALSA: usb-audio: explicitly match Logitech QuickCam Chris Wright
2007-06-08  7:15 ` [patch 14/32] s390: Fix TCP/UDP pseudo header checksum computation Chris Wright
2007-06-08  7:15 ` [patch 15/32] s390: page_mkclean data corruption Chris Wright
2007-06-08  7:15 ` [patch 16/32] V4L/DVB (5593): Budget-ci: Fix tuning for TDM 1316 (160..200 MHz) Chris Wright
2007-06-08  7:15 ` [patch 17/32] kbuild: fixdep segfault on pathological string-o-death Chris Wright
2007-06-08  7:15 ` [patch 18/32] ntfs_init_locked_inode(): fix array indexing Chris Wright
2007-06-08  7:15 ` [patch 19/32] ICMP: Fix icmp_errors_use_inbound_ifaddr sysctl Chris Wright
2007-06-08  7:15 ` [patch 20/32] NET: parse ip:port strings correctly in in4_pton Chris Wright
2007-06-08  7:15 ` [patch 21/32] IPSEC: Fix panic when using inter address familiy IPsec on loopback Chris Wright
2007-06-08  7:15 ` [patch 22/32] NET: Fix BMSR_100{HALF,FULL}2 defines in linux/mii.h Chris Wright
2007-06-08  7:15 ` [patch 23/32] IPV4: Correct rp_filter help text Chris Wright
2007-06-09  1:20   ` Herbert Xu
2007-06-09  1:22     ` Herbert Xu
2007-06-08  7:15 ` [patch 24/32] SPARC: Linux always started with 9600 8N1 Chris Wright
2007-06-08  7:15 ` [patch 25/32] NET: "wrong timeout value" in sk_wait_data() v2 Chris Wright
2007-06-08  7:15 ` [patch 26/32] SPARC64: Fix two bugs wrt. kernel 4MB TSB Chris Wright
2007-06-08  7:15 ` [patch 27/32] SPARC64: Fix _PAGE_EXEC_4U check in sun4u I-TLB miss handler Chris Wright
2007-06-08  7:15 ` [patch 28/32] TCP: Use default 32768-61000 outgoing port range in all cases Chris Wright
2007-06-08  7:15 ` [patch 29/32] NET: Fix race condition about network device name allocation Chris Wright
2007-06-08  7:15 ` Chris Wright [this message]
2007-06-08  7:15 ` [patch 31/32] IPV6 ROUTE: No longer handle ::/0 specially Chris Wright
2007-06-08  7:15 ` [patch 32/32] SPARC64: Dont be picky about virtual-dma values on sun4v Chris Wright
2007-06-08  7:29 ` [stable] [patch 00/32] 2.6.20-stable review Chris Wright
2007-06-08 16:51   ` Chris Wright
2007-06-08 12:21 ` Fortier,Vincent [Montreal]
2007-06-08 15:45   ` Chris Wright
2007-06-08 20:56     ` Chuck Ebbert

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070608071557.973282000@sous-sol.org \
    --to=chrisw@sous-sol.org \
    --cc=akpm@linux-foundation.org \
    --cc=alan@lxorguk.ukuu.org.uk \
    --cc=bunk@stusta.de \
    --cc=cavokz@gmail.com \
    --cc=cebbert@redhat.com \
    --cc=chuckw@quantumlinux.com \
    --cc=davej@redhat.com \
    --cc=davem@davemloft.net \
    --cc=jmforbes@linuxtx.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mkrufky@linuxtv.org \
    --cc=rdunlap@xenotime.net \
    --cc=reviews@ml.cw.f00f.org \
    --cc=stable@kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=tytso@mit.edu \
    --cc=zwane@arm.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox