All of lore.kernel.org
 help / color / mirror / Atom feed
From: Greg KH <gregkh@suse.de>
To: linux-kernel@vger.kernel.org, stable@kernel.org
Cc: Justin Forbes <jmforbes@linuxtx.org>,
	Zwane Mwaikambo <zwane@arm.linux.org.uk>,
	"Theodore Ts'o" <tytso@mit.edu>,
	Randy Dunlap <rdunlap@xenotime.net>,
	Dave Jones <davej@redhat.com>,
	Chuck Wolber <chuckw@quantumlinux.com>,
	Chris Wedgwood <reviews@ml.cw.f00f.org>,
	Michael Krufky <mkrufky@linuxtv.org>,
	Chuck Ebbert <cebbert@redhat.com>,
	Domenico Andreoli <cavokz@gmail.com>, Willy Tarreau <w@1wt.eu>,
	Rodrigo Rubira Branco <rbranco@la.checkpoint.com>,
	Jake Edge <jake@lwn.net>, Eugene Teo <eteo@redhat.com>,
	torvalds@linux-foundation.org, akpm@linux-foundation.org,
	alan@lxorguk.ukuu.org.uk, Miklos Szeredi <mszeredi@suse.cz>
Subject: [patch 20/49] net: unix: fix inflight counting bug in garbage collector
Date: Tue, 11 Nov 2008 16:23:35 -0800	[thread overview]
Message-ID: <20081112002335.GU10989@kroah.com> (raw)
In-Reply-To: <20081112002215.GA10989@kroah.com>

[-- Attachment #1: net-unix-fix-inflight-counting-bug-in-garbage-collector.patch --]
[-- Type: text/plain, Size: 6500 bytes --]

2.6.27-stable review patch.  If anyone has any objections, please let us know.

------------------

From: Miklos Szeredi <mszeredi@suse.cz>

commit 6209344f5a3795d34b7f2c0061f49802283b6bdd upstream

Previously I assumed that the receive queues of candidates don't
change during the GC.  This is only half true, nothing can be received
from the queues (see comment in unix_gc()), but buffers could be added
through the other half of the socket pair, which may still have file
descriptors referring to it.

This can result in inc_inflight_move_tail() erronously increasing the
"inflight" counter for a unix socket for which dec_inflight() wasn't
previously called.  This in turn can trigger the "BUG_ON(total_refs <
inflight_refs)" in a later garbage collection run.

Fix this by only manipulating the "inflight" counter for sockets which
are candidates themselves.  Duplicating the file references in
unix_attach_fds() is also needed to prevent a socket becoming a
candidate for GC while the skb that contains it is not yet queued.

Reported-by: Andrea Bittau <a.bittau@cs.ucl.ac.uk>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

---
 include/net/af_unix.h |    1 +
 net/unix/af_unix.c    |   31 ++++++++++++++++++++++++-------
 net/unix/garbage.c    |   49 +++++++++++++++++++++++++++++++++++++------------
 3 files changed, 62 insertions(+), 19 deletions(-)

--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -54,6 +54,7 @@ struct unix_sock {
         atomic_long_t           inflight;
         spinlock_t		lock;
 	unsigned int		gc_candidate : 1;
+	unsigned int		gc_maybe_cycle : 1;
         wait_queue_head_t       peer_wait;
 };
 #define unix_sk(__sk) ((struct unix_sock *)__sk)
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1300,14 +1300,23 @@ static void unix_destruct_fds(struct sk_
 	sock_wfree(skb);
 }
 
-static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
+static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
 {
 	int i;
+
+	/*
+	 * Need to duplicate file references for the sake of garbage
+	 * collection.  Otherwise a socket in the fps might become a
+	 * candidate for GC while the skb is not yet queued.
+	 */
+	UNIXCB(skb).fp = scm_fp_dup(scm->fp);
+	if (!UNIXCB(skb).fp)
+		return -ENOMEM;
+
 	for (i=scm->fp->count-1; i>=0; i--)
 		unix_inflight(scm->fp->fp[i]);
-	UNIXCB(skb).fp = scm->fp;
 	skb->destructor = unix_destruct_fds;
-	scm->fp = NULL;
+	return 0;
 }
 
 /*
@@ -1366,8 +1375,11 @@ static int unix_dgram_sendmsg(struct kio
 		goto out;
 
 	memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
-	if (siocb->scm->fp)
-		unix_attach_fds(siocb->scm, skb);
+	if (siocb->scm->fp) {
+		err = unix_attach_fds(siocb->scm, skb);
+		if (err)
+			goto out_free;
+	}
 	unix_get_secdata(siocb->scm, skb);
 
 	skb_reset_transport_header(skb);
@@ -1536,8 +1548,13 @@ static int unix_stream_sendmsg(struct ki
 		size = min_t(int, size, skb_tailroom(skb));
 
 		memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
-		if (siocb->scm->fp)
-			unix_attach_fds(siocb->scm, skb);
+		if (siocb->scm->fp) {
+			err = unix_attach_fds(siocb->scm, skb);
+			if (err) {
+				kfree_skb(skb);
+				goto out_err;
+			}
+		}
 
 		if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
 			kfree_skb(skb);
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -186,8 +186,17 @@ static void scan_inflight(struct sock *x
 				 */
 				struct sock *sk = unix_get_socket(*fp++);
 				if (sk) {
-					hit = true;
-					func(unix_sk(sk));
+					struct unix_sock *u = unix_sk(sk);
+
+					/*
+					 * Ignore non-candidates, they could
+					 * have been added to the queues after
+					 * starting the garbage collection
+					 */
+					if (u->gc_candidate) {
+						hit = true;
+						func(u);
+					}
 				}
 			}
 			if (hit && hitlist != NULL) {
@@ -249,11 +258,11 @@ static void inc_inflight_move_tail(struc
 {
 	atomic_long_inc(&u->inflight);
 	/*
-	 * If this is still a candidate, move it to the end of the
-	 * list, so that it's checked even if it was already passed
-	 * over
+	 * If this still might be part of a cycle, move it to the end
+	 * of the list, so that it's checked even if it was already
+	 * passed over
 	 */
-	if (u->gc_candidate)
+	if (u->gc_maybe_cycle)
 		list_move_tail(&u->link, &gc_candidates);
 }
 
@@ -267,6 +276,7 @@ void unix_gc(void)
 	struct unix_sock *next;
 	struct sk_buff_head hitlist;
 	struct list_head cursor;
+	LIST_HEAD(not_cycle_list);
 
 	spin_lock(&unix_gc_lock);
 
@@ -282,10 +292,14 @@ void unix_gc(void)
 	 *
 	 * Holding unix_gc_lock will protect these candidates from
 	 * being detached, and hence from gaining an external
-	 * reference.  This also means, that since there are no
-	 * possible receivers, the receive queues of these sockets are
-	 * static during the GC, even though the dequeue is done
-	 * before the detach without atomicity guarantees.
+	 * reference.  Since there are no possible receivers, all
+	 * buffers currently on the candidates' queues stay there
+	 * during the garbage collection.
+	 *
+	 * We also know that no new candidate can be added onto the
+	 * receive queues.  Other, non candidate sockets _can_ be
+	 * added to queue, so we must make sure only to touch
+	 * candidates.
 	 */
 	list_for_each_entry_safe(u, next, &gc_inflight_list, link) {
 		long total_refs;
@@ -299,6 +313,7 @@ void unix_gc(void)
 		if (total_refs == inflight_refs) {
 			list_move_tail(&u->link, &gc_candidates);
 			u->gc_candidate = 1;
+			u->gc_maybe_cycle = 1;
 		}
 	}
 
@@ -325,14 +340,24 @@ void unix_gc(void)
 		list_move(&cursor, &u->link);
 
 		if (atomic_long_read(&u->inflight) > 0) {
-			list_move_tail(&u->link, &gc_inflight_list);
-			u->gc_candidate = 0;
+			list_move_tail(&u->link, &not_cycle_list);
+			u->gc_maybe_cycle = 0;
 			scan_children(&u->sk, inc_inflight_move_tail, NULL);
 		}
 	}
 	list_del(&cursor);
 
 	/*
+	 * not_cycle_list contains those sockets which do not make up a
+	 * cycle.  Restore these to the inflight list.
+	 */
+	while (!list_empty(&not_cycle_list)) {
+		u = list_entry(not_cycle_list.next, struct unix_sock, link);
+		u->gc_candidate = 0;
+		list_move_tail(&u->link, &gc_inflight_list);
+	}
+
+	/*
 	 * Now gc_candidates contains only garbage.  Restore original
 	 * inflight counters for these as well, and remove the skbuffs
 	 * which are creating the cycle(s).

-- 

  parent reply	other threads:[~2008-11-12  0:33 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20081112001401.926965113@mini.kroah.org>
2008-11-12  0:22 ` [patch 00/49] 2.6.27.5 stable review Greg KH
2008-11-12  0:22   ` [patch 01/49] ext3: wait on all pending commits in ext3_sync_fs Greg KH
2008-11-12  0:22     ` Greg KH
2008-11-12  0:22   ` [patch 02/49] x86: add DMI quirk for AMI BIOS which corrupts address 0xc000 during resume Greg KH
2008-11-12  0:22   ` [patch 03/49] x86: reserve low 64K on AMI and Phoenix BIOS boxen Greg KH
2008-11-12  0:22   ` [patch 04/49] x86: add X86_RESERVE_LOW_64K Greg KH
2008-11-12  0:23   ` [patch 05/49] x86: fix CONFIG_X86_RESERVE_LOW_64K=y Greg KH
2008-11-12  0:23   ` [patch 06/49] x86: fix macro with bad_bios_dmi_table Greg KH
2008-11-12  0:23   ` [patch 07/49] cgroups: fix invalid cgrp->dentry before cgroup has been completely removed Greg KH
2008-11-12  0:23   ` [patch 08/49] hugetlb: pull gigantic page initialisation out of the default path Greg KH
2008-11-12  0:23   ` [patch 09/49] hugetlbfs: handle pages higher order than MAX_ORDER Greg KH
2008-11-12  0:23   ` [patch 10/49] cciss: fix regression firmware not displayed in procfs Greg KH
2008-11-12  0:23   ` [patch 11/49] cciss: fix sysfs broken symlink regression Greg KH
2008-11-12  0:23   ` [patch 12/49] cciss: new hardware support Greg KH
2008-11-12  0:23   ` [patch 13/49] md: linear: Fix a division by zero bug for very small arrays Greg KH
2008-11-12  0:23   ` [patch 14/49] md: fix bug in raid10 recovery Greg KH
2008-11-12  0:23   ` [patch 15/49] JFFS2: fix race condition in jffs2_lzo_compress() Greg KH
2008-11-12  0:23   ` [patch 16/49] JFFS2: Fix lack of locking in thread_should_wake() Greg KH
2008-11-12  0:23   ` [patch 17/49] ARM: xsc3: fix xsc3_l2_inv_range Greg KH
2008-11-12  0:23   ` [patch 18/49] MTD: Fix cfi_send_gen_cmd handling of x16 devices in x8 mode (v4) Greg KH
2008-11-12  0:23   ` [patch 19/49] x86: dont use tsc_khz to calculate lpj if notsc is passed Greg KH
2008-11-12  0:23   ` Greg KH [this message]
2008-11-12  0:23   ` [patch 21/49] r8169: get ethtool settings through the generic mii helper Greg KH
2008-11-12  0:23   ` [patch 22/49] r8169: fix RxMissed register access Greg KH
2008-11-12  0:23   ` [patch 23/49] r8169: wake up the PHY of the 8168 Greg KH
2008-11-12  0:23   ` [patch 24/49] I/OAT: fix channel resources free for not allocated channels Greg KH
2008-11-12  0:23   ` [patch 25/49] I/OAT: fix dma_pin_iovec_pages() error handling Greg KH
2008-11-12  0:23   ` [patch 26/49] I/OAT: fix async_tx.callback checking Greg KH
2008-11-12  0:23   ` [patch 27/49] dca: fixup initialization dependency Greg KH
2008-11-12  0:23   ` [patch 28/49] iwlwifi: allow consecutive scans in unassociated state Greg KH
2008-11-12  0:23   ` [patch 29/49] iwlwifi: allow association on radar channel in power save Greg KH
2008-11-12  0:23   ` [patch 30/49] iwlwifi: remove HT flags from RXON when not in HT anymore Greg KH
2008-11-12  0:23   ` [patch 31/49] iwlwifi: dont fail if scan is issued too early Greg KH
2008-11-12  0:24   ` [patch 32/49] iwlwifi: use correct DMA_MASK Greg KH
2008-11-12  0:24   ` [patch 33/49] iwlwifi: fix suspend to RAM in iwlwifi Greg KH
2008-11-12  0:24   ` [patch 34/49] iwlwifi: generic init calibrations framework Greg KH
2008-11-12  0:24   ` [patch 35/49] zd1211rw: Add 2 device IDs Greg KH
2008-11-12  0:24   ` [patch 36/49] iwl3945: fix deadlock on suspend Greg KH
2008-11-12  0:24   ` [patch 37/49] iwl3945: do not send scan command if channel count zero Greg KH
2008-11-12  0:24   ` [patch 38/49] cpqarry: fix return value of cpqarray_init() Greg KH
2008-11-12  0:24   ` [patch 39/49] ACPI: dock: avoid check _STA method Greg KH
2008-11-12  0:24   ` [patch 40/49] ARM: 5300/1: fixup spitz reset during boot Greg KH
2008-11-12  0:24   ` [patch 41/49] KEYS: Make request key instantiate the per-user keyrings Greg KH
2008-11-12  0:24   ` [patch 42/49] libata: fix last_reset timestamp handling Greg KH
2008-11-12  0:24   ` [patch 43/49] ALSA: hda: make a STAC_DELL_EQ option Greg KH
2008-11-12  0:24   ` [patch 44/49] Fix __pfn_to_page(pfn) for CONFIG_DISCONTIGMEM=y Greg KH
2008-11-12  0:24   ` [patch 45/49] mmc: increase SD write timeout for crappy cards Greg KH
2008-11-12  0:24   ` [patch 46/49] hfsplus: fix Buffer overflow with a corrupted image (CVE-2008-4933) Greg KH
2008-11-12  0:24   ` [patch 47/49] hfsplus: check read_mapping_page() return value (CVE-2008-4934) Greg KH
2008-11-12  0:24   ` [patch 48/49] hfs: fix namelength memory corruption (CVE-2008-5025) Greg KH
2008-11-12  0:24   ` [patch 49/49] HID: fix incorrent length condition in hidraw_write() Greg KH
2008-11-12  0:44   ` [patch 00/49] 2.6.27.5 stable review Gabriel C
2008-11-12  1:07     ` Greg KH
2008-11-12  0:54   ` Willy Tarreau
2008-11-12 14:08   ` Frans Pop
2008-11-12 17:03     ` [stable] " Greg KH
2008-11-13 22:07     ` Greg KH

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20081112002335.GU10989@kroah.com \
    --to=gregkh@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=alan@lxorguk.ukuu.org.uk \
    --cc=cavokz@gmail.com \
    --cc=cebbert@redhat.com \
    --cc=chuckw@quantumlinux.com \
    --cc=davej@redhat.com \
    --cc=eteo@redhat.com \
    --cc=jake@lwn.net \
    --cc=jmforbes@linuxtx.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mkrufky@linuxtv.org \
    --cc=mszeredi@suse.cz \
    --cc=rbranco@la.checkpoint.com \
    --cc=rdunlap@xenotime.net \
    --cc=reviews@ml.cw.f00f.org \
    --cc=stable@kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=tytso@mit.edu \
    --cc=w@1wt.eu \
    --cc=zwane@arm.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.