All of lore.kernel.org
 help / color / mirror / Atom feed
From: Rainer Weikusat <rweikusat@mobileactivedefense.com>
To: Jason Baron <jbaron@akamai.com>
Cc: Rainer Weikusat <rweikusat@mobileactivedefense.com>,
	davem@davemloft.net, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org, minipli@googlemail.com,
	normalperson@yhbt.net, eric.dumazet@gmail.com,
	viro@zeniv.linux.org.uk, davidel@xmailserver.org,
	dave@stgolabs.net, olivier@mauras.ch, pageexec@freemail.hu,
	torvalds@linux-foundation.org, peterz@infradead.org
Subject: Re: [RFC] unix: fix use-after-free in unix_dgram_poll()/ 4.2.5
Date: Fri, 30 Oct 2015 20:52:08 +0000	[thread overview]
Message-ID: <877fm43wqv.fsf_-_@doppelsaurus.mobileactivedefense.com> (raw)
In-Reply-To: <874mhbx7o1.fsf_-_@doppelsaurus.mobileactivedefense.com> (Rainer Weikusat's message of "Wed, 28 Oct 2015 16:46:38 +0000")

Same changes ported to 4.2.5 with some minor improvments (I hope),
namely,

	- applied a round of DeMorgan to the 'quick' check function in
          order to simplify the condition

	- fixed a (minor) error in the dgram_sendmsg change: In case the
          2nd check resulted in 'can send now', the code would continue
          with 'wait until timeout expired' (since timeo was 0 in the
          case, this shouldn't make much of a practical difference)

	- (hopefully) more intelligible function names and better
          explanation

	- dropped the POLL_OUT_ALL macro again as that's really
          unrelated

---
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -62,6 +62,7 @@ struct unix_sock {
 #define UNIX_GC_CANDIDATE	0
 #define UNIX_GC_MAYBE_CYCLE	1
 	struct socket_wq	peer_wq;
+	wait_queue_t		peer_wake;
 };
 
 static inline struct unix_sock *unix_sk(struct sock *sk)
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -326,6 +326,122 @@ found:
 	return s;
 }
 
+/* Support code for asymmetrically connected dgram sockets
+ *
+ * If a datagram socket is connected to a socket not itself connected
+ * to the first socket (eg, /dev/log), clients may only enqueue more
+ * messages if the present receive queue of the server socket is not
+ * "too large". This means there's a second writeability condition
+ * poll and sendmsg need to test. The dgram recv code will do a wake
+ * up on the peer_wait wait queue of a socket upon reception of a
+ * datagram which needs to be propagated to sleeping would-be writers
+ * since these might not have sent anything so far. This can't be
+ * accomplished via poll_wait because the lifetime of the server
+ * socket might be less than that of its clients if these break their
+ * association with it or if the server socket is closed while clients
+ * are still connected to it and there's no way to inform "a polling
+ * implementation" that it should let go of a certain wait queue
+ *
+ * In order to propagate a wake up, a wait_queue_t of the client
+ * socket is enqueued on the peer_wait queue of the server socket
+ * whose wake function does a wake_up on the ordinary client socket
+ * wait queue. This connection is established whenever a write (or
+ * poll for write) hit the flow control condition and broken when the
+ * association to the server socket is dissolved or after a wake up
+ * was relayed.
+ */
+
+static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
+				      void *key)
+{
+	struct unix_sock *u;
+	wait_queue_head_t *u_sleep;
+
+	u = container_of(q, struct unix_sock, peer_wake);
+
+	__remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
+			    &u->peer_wake);
+	u->peer_wake.private = NULL;
+
+	/* relaying can only happen while the wq still exists */
+	u_sleep = sk_sleep(&u->sk);
+	if (u_sleep)
+		wake_up_interruptible_poll(u_sleep, key);
+
+	return 0;
+}
+
+static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
+{
+	struct unix_sock *u, *u_other;
+	int rc;
+
+	u = unix_sk(sk);
+	u_other = unix_sk(other);
+	rc = 0;
+
+	spin_lock(&u_other->peer_wait.lock);
+
+	if (!u->peer_wake.private) {
+		u->peer_wake.private = other;
+		__add_wait_queue(&u_other->peer_wait, &u->peer_wake);
+
+		rc = 1;
+	}
+
+	spin_unlock(&u_other->peer_wait.lock);
+	return rc;
+}
+
+static int unix_dgram_peer_wake_disconnect(struct sock *sk, struct sock *other)
+{
+	struct unix_sock *u, *u_other;
+	int rc;
+
+	u = unix_sk(sk);
+	u_other = unix_sk(other);
+	rc = 0;
+
+	spin_lock(&u_other->peer_wait.lock);
+
+	if (u->peer_wake.private == other) {
+		__remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
+		u->peer_wake.private = NULL;
+
+		rc = 1;
+	}
+
+	spin_unlock(&u_other->peer_wait.lock);
+	return rc;
+}
+
+/* Needs 'this' unix state lock. Lockless check if data can (likely)
+ * be sent.
+ */
+static inline int unix_dgram_peer_recv_ready(struct sock *sk,
+					     struct sock *other)
+{
+	return unix_peer(other) == sk || !unix_recvq_full(other);
+}
+
+/* Needs 'this' unix state lock. After recv_ready indicated not ready,
+ * establish peer_wait connection if still needed.
+ */
+static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
+{
+	int conned;
+
+	conned = unix_dgram_peer_wake_connect(sk, other);
+
+	if (unix_recvq_full(other))
+		return 1;
+
+	if (conned)
+		unix_dgram_peer_wake_disconnect(sk, other);
+
+	return 0;
+}
+
 static inline int unix_writable(struct sock *sk)
 {
 	return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
@@ -430,6 +546,8 @@ static void unix_release_sock(struct sock *sk, int embrion)
 			skpair->sk_state_change(skpair);
 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 		}
+
+		unix_dgram_peer_wake_disconnect(sk, skpair);
 		sock_put(skpair); /* It may now die */
 		unix_peer(sk) = NULL;
 	}
@@ -664,6 +782,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 	INIT_LIST_HEAD(&u->link);
 	mutex_init(&u->readlock); /* single task reading lock */
 	init_waitqueue_head(&u->peer_wait);
+	init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 	unix_insert_socket(unix_sockets_unbound(sk), sk);
 out:
 	if (sk == NULL)
@@ -983,7 +1102,7 @@ static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 	struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
 	struct sock *other;
 	unsigned int hash;
-	int err;
+	int err, disconned;
 
 	if (addr->sa_family != AF_UNSPEC) {
 		err = unix_mkname(sunaddr, alen, &hash);
@@ -1031,6 +1150,14 @@ restart:
 	if (unix_peer(sk)) {
 		struct sock *old_peer = unix_peer(sk);
 		unix_peer(sk) = other;
+
+		disconned = unix_dgram_peer_wake_disconnect(sk, other);
+		if (disconned)
+			wake_up_interruptible_poll(sk_sleep(sk),
+						   POLLOUT |
+						   POLLWRNORM |
+						   POLLWRBAND);
+
 		unix_state_double_unlock(sk, other);
 
 		if (other != old_peer)
@@ -1463,7 +1590,7 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
 	struct sock *other = NULL;
 	int namelen = 0; /* fake GCC */
-	int err;
+	int err, disconned;
 	unsigned int hash;
 	struct sk_buff *skb;
 	long timeo;
@@ -1565,6 +1692,14 @@ restart:
 		unix_state_lock(sk);
 		if (unix_peer(sk) == other) {
 			unix_peer(sk) = NULL;
+
+			disconned = unix_dgram_peer_wake_disconnect(sk, other);
+			if (disconned)
+				wake_up_interruptible_poll(sk_sleep(sk),
+							   POLLOUT |
+							   POLLWRNORM |
+							   POLLWRBAND);
+
 			unix_state_unlock(sk);
 
 			unix_dgram_disconnected(sk, other);
@@ -1590,10 +1725,14 @@ restart:
 			goto out_unlock;
 	}
 
-	if (unix_peer(other) != sk && unix_recvq_full(other)) {
+	if (!unix_dgram_peer_recv_ready(sk, other)) {
 		if (!timeo) {
-			err = -EAGAIN;
-			goto out_unlock;
+			if (unix_dgram_peer_wake_me(sk, other)) {
+				err = -EAGAIN;
+				goto out_unlock;
+			}
+
+			goto restart;
 		}
 
 		timeo = unix_wait_for_peer(other, timeo);
@@ -2453,14 +2592,16 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
 		return mask;
 
 	writable = unix_writable(sk);
-	other = unix_peer_get(sk);
-	if (other) {
-		if (unix_peer(other) != sk) {
-			sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
-			if (unix_recvq_full(other))
-				writable = 0;
-		}
-		sock_put(other);
+	if (writable) {
+		unix_state_lock(sk);
+
+		other = unix_peer(sk);
+		if (other &&
+		    !unix_dgram_peer_recv_ready(sk, other) &&
+		    unix_dgram_peer_wake_me(sk, other))
+			writable = 0;
+
+		unix_state_unlock(sk);
 	}
 
 	if (writable)

  parent reply	other threads:[~2015-10-30 20:53 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-10-02 20:43 [PATCH v2 0/3] af_unix: fix use-after-free Jason Baron
2015-10-02 20:43 ` [PATCH v2 1/3] unix: fix use-after-free in unix_dgram_poll() Jason Baron
2015-10-03  5:46   ` Mathias Krause
2015-10-03 17:02     ` Rainer Weikusat
2015-10-04 17:41       ` Rainer Weikusat
2015-10-05 16:31   ` Rainer Weikusat
2015-10-05 16:54     ` Eric Dumazet
2015-10-05 17:20       ` Rainer Weikusat
2015-10-05 17:55     ` Jason Baron
2015-10-12 20:41       ` Rainer Weikusat
2015-10-14  3:44         ` Jason Baron
2015-10-14 17:47           ` Rainer Weikusat
2015-10-15  2:54             ` Jason Baron
2015-10-18 20:58               ` Rainer Weikusat
2015-10-19 15:07                 ` Jason Baron
2015-10-20 22:29                   ` Rainer Weikusat
2015-10-21 17:34                     ` Rainer Weikusat
2015-10-28 16:46                     ` [RFC] " Rainer Weikusat
2015-10-28 17:57                       ` Jason Baron
2015-10-29 14:23                         ` Rainer Weikusat
2015-10-30 20:52                       ` Rainer Weikusat [this message]
     [not found]                         ` <57d2f5b6aae251957bff7a1a52b8bf2c@core-hosting.net>
2015-11-02 21:55                           ` [RFC] unix: fix use-after-free in unix_dgram_poll()/ 4.2.5 Rainer Weikusat
2015-10-02 20:43 ` [PATCH v2 2/3] af_unix: Convert gc_flags to flags Jason Baron
2015-10-02 20:44 ` [PATCH v2 3/3] af_unix: optimize the unix_dgram_recvmsg() Jason Baron
2015-10-05  7:41   ` Peter Zijlstra
2015-10-05 17:13     ` Jason Baron

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=877fm43wqv.fsf_-_@doppelsaurus.mobileactivedefense.com \
    --to=rweikusat@mobileactivedefense.com \
    --cc=dave@stgolabs.net \
    --cc=davem@davemloft.net \
    --cc=davidel@xmailserver.org \
    --cc=eric.dumazet@gmail.com \
    --cc=jbaron@akamai.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=minipli@googlemail.com \
    --cc=netdev@vger.kernel.org \
    --cc=normalperson@yhbt.net \
    --cc=olivier@mauras.ch \
    --cc=pageexec@freemail.hu \
    --cc=peterz@infradead.org \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.