public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Greg KH <gregkh@suse.de>
To: linux-kernel@vger.kernel.org, stable@kernel.org,
	Andrew Morton <akpm@osdl.org>
Cc: Justin Forbes <jmforbes@linuxtx.org>,
	Zwane Mwaikambo <zwane@arm.linux.org.uk>,
	"Theodore Ts'o" <tytso@mit.edu>,
	Randy Dunlap <rdunlap@xenotime.net>,
	Dave Jones <davej@redhat.com>,
	Chuck Wolber <chuckw@quantumlinux.com>,
	Chris Wedgwood <reviews@ml.cw.f00f.org>,
	Michael Krufky <mkrufky@linuxtv.org>,
	Chuck Ebbert <cebbert@redhat.com>,
	torvalds@linux-foundation.org, akpm@linux-foundation.org,
	alan@lxorguk.ukuu.org.uk, nfs@lists.sourceforge.net,
	Neil Brown <neilb@suse.de>
Subject: [patch 006/101] knfsd: Fix a race in closing NFSd connections.
Date: Wed, 07 Mar 2007 09:10:41 -0800	[thread overview]
Message-ID: <20070307171136.733769722@mini.kroah.org> (raw)
In-Reply-To: 20070307171035.150802805@mini.kroah.org

[-- Attachment #1: knfsd-fix-a-race-in-closing-nfsd-connections.patch --]
[-- Type: text/plain, Size: 6430 bytes --]

If you lose this race, it can iput a socket inode twice and you
get a BUG in fs/inode.c

When I added the option for user-space to close a socket,
I added some cruft to svc_delete_socket so that I could call
that function when closing a socket per user-space request.

This was the wrong thing to do.  I should have just set SK_CLOSE
and let normal mechanisms do the work.

Not only wrong, but buggy.  The locking is all wrong and it openned
up a race where-by a socket could be closed twice.

So this patch:
  Introduces svc_close_socket which sets SK_CLOSE then either leave
  the close up to a thread, or calls svc_delete_socket if it can
  get SK_BUSY.

  Adds a bias to sk_busy which is removed when SK_DEAD is set,
  This avoid races around shutting down the socket.

  Changes several 'spin_lock' to 'spin_lock_bh' where the _bh 
  was missing.

Bugzilla-url: http://bugzilla.kernel.org/show_bug.cgi?id=7916

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>


---
 include/linux/sunrpc/svcsock.h |    2 -
 net/sunrpc/svc.c               |    4 +--
 net/sunrpc/svcsock.c           |   52 +++++++++++++++++++++++++++++------------
 3 files changed, 41 insertions(+), 17 deletions(-)

--- linux-2.6.20.1.orig/include/linux/sunrpc/svcsock.h
+++ linux-2.6.20.1/include/linux/sunrpc/svcsock.h
@@ -63,7 +63,7 @@ struct svc_sock {
  * Function prototypes.
  */
 int		svc_makesock(struct svc_serv *, int, unsigned short);
-void		svc_delete_socket(struct svc_sock *);
+void		svc_close_socket(struct svc_sock *);
 int		svc_recv(struct svc_rqst *, long);
 int		svc_send(struct svc_rqst *);
 void		svc_drop(struct svc_rqst *);
--- linux-2.6.20.1.orig/net/sunrpc/svc.c
+++ linux-2.6.20.1/net/sunrpc/svc.c
@@ -386,7 +386,7 @@ svc_destroy(struct svc_serv *serv)
 		svsk = list_entry(serv->sv_tempsocks.next,
 				  struct svc_sock,
 				  sk_list);
-		svc_delete_socket(svsk);
+		svc_close_socket(svsk);
 	}
 	if (serv->sv_shutdown)
 		serv->sv_shutdown(serv);
@@ -395,7 +395,7 @@ svc_destroy(struct svc_serv *serv)
 		svsk = list_entry(serv->sv_permsocks.next,
 				  struct svc_sock,
 				  sk_list);
-		svc_delete_socket(svsk);
+		svc_close_socket(svsk);
 	}
 	
 	cache_clean_deferred(serv);
--- linux-2.6.20.1.orig/net/sunrpc/svcsock.c
+++ linux-2.6.20.1/net/sunrpc/svcsock.c
@@ -62,6 +62,12 @@
  *		after a clear, the socket must be read/accepted
  *		 if this succeeds, it must be set again.
  *	SK_CLOSE can set at any time. It is never cleared.
+ *      sk_inuse contains a bias of '1' until SK_DEAD is set.
+ *             so when sk_inuse hits zero, we know the socket is dead
+ *             and no-one is using it.
+ *      SK_DEAD can only be set while SK_BUSY is held which ensures
+ *             no other thread will be using the socket or will try to
+ *	       set SK_DEAD.
  *
  */
 
@@ -70,6 +76,7 @@
 
 static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
 					 int *errp, int pmap_reg);
+static void		svc_delete_socket(struct svc_sock *svsk);
 static void		svc_udp_data_ready(struct sock *, int);
 static int		svc_udp_recvfrom(struct svc_rqst *);
 static int		svc_udp_sendto(struct svc_rqst *);
@@ -329,8 +336,9 @@ void svc_reserve(struct svc_rqst *rqstp,
 static inline void
 svc_sock_put(struct svc_sock *svsk)
 {
-	if (atomic_dec_and_test(&svsk->sk_inuse) &&
-			test_bit(SK_DEAD, &svsk->sk_flags)) {
+	if (atomic_dec_and_test(&svsk->sk_inuse)) {
+		BUG_ON(! test_bit(SK_DEAD, &svsk->sk_flags));
+
 		dprintk("svc: releasing dead socket\n");
 		if (svsk->sk_sock->file)
 			sockfd_put(svsk->sk_sock);
@@ -520,7 +528,7 @@ svc_sock_names(char *buf, struct svc_ser
 
 	if (!serv)
 		return 0;
-	spin_lock(&serv->sv_lock);
+	spin_lock_bh(&serv->sv_lock);
 	list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) {
 		int onelen = one_sock_name(buf+len, svsk);
 		if (toclose && strcmp(toclose, buf+len) == 0)
@@ -528,12 +536,12 @@ svc_sock_names(char *buf, struct svc_ser
 		else
 			len += onelen;
 	}
-	spin_unlock(&serv->sv_lock);
+	spin_unlock_bh(&serv->sv_lock);
 	if (closesk)
 		/* Should unregister with portmap, but you cannot
 		 * unregister just one protocol...
 		 */
-		svc_delete_socket(closesk);
+		svc_close_socket(closesk);
 	else if (toclose)
 		return -ENOENT;
 	return len;
@@ -683,6 +691,11 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
 		return svc_deferred_recv(rqstp);
 	}
 
+	if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
+		svc_delete_socket(svsk);
+		return 0;
+	}
+
 	clear_bit(SK_DATA, &svsk->sk_flags);
 	while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) {
 		if (err == -EAGAIN) {
@@ -1176,7 +1189,8 @@ svc_tcp_sendto(struct svc_rqst *rqstp)
 		       rqstp->rq_sock->sk_server->sv_name,
 		       (sent<0)?"got error":"sent only",
 		       sent, xbufp->len);
-		svc_delete_socket(rqstp->rq_sock);
+		set_bit(SK_CLOSE, &rqstp->rq_sock->sk_flags);
+		svc_sock_enqueue(rqstp->rq_sock);
 		sent = -EAGAIN;
 	}
 	return sent;
@@ -1495,7 +1509,7 @@ svc_setup_socket(struct svc_serv *serv, 
 	svsk->sk_odata = inet->sk_data_ready;
 	svsk->sk_owspace = inet->sk_write_space;
 	svsk->sk_server = serv;
-	atomic_set(&svsk->sk_inuse, 0);
+	atomic_set(&svsk->sk_inuse, 1);
 	svsk->sk_lastrecv = get_seconds();
 	spin_lock_init(&svsk->sk_defer_lock);
 	INIT_LIST_HEAD(&svsk->sk_deferred);
@@ -1618,7 +1632,7 @@ bummer:
 /*
  * Remove a dead socket
  */
-void
+static void
 svc_delete_socket(struct svc_sock *svsk)
 {
 	struct svc_serv	*serv;
@@ -1644,16 +1658,26 @@ svc_delete_socket(struct svc_sock *svsk)
 	 * while still attached to a queue, the queue itself
 	 * is about to be destroyed (in svc_destroy).
 	 */
-	if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags))
+	if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) {
+		BUG_ON(atomic_read(&svsk->sk_inuse)<2);
+		atomic_dec(&svsk->sk_inuse);
 		if (test_bit(SK_TEMP, &svsk->sk_flags))
 			serv->sv_tmpcnt--;
+	}
 
-	/* This atomic_inc should be needed - svc_delete_socket
-	 * should have the semantic of dropping a reference.
-	 * But it doesn't yet....
-	 */
-	atomic_inc(&svsk->sk_inuse);
 	spin_unlock_bh(&serv->sv_lock);
+}
+
+void svc_close_socket(struct svc_sock *svsk)
+{
+	set_bit(SK_CLOSE, &svsk->sk_flags);
+	if (test_and_set_bit(SK_BUSY, &svsk->sk_flags))
+		/* someone else will have to effect the close */
+		return;
+
+	atomic_inc(&svsk->sk_inuse);
+	svc_delete_socket(svsk);
+	clear_bit(SK_BUSY, &svsk->sk_flags);
 	svc_sock_put(svsk);
 }
 

--

  parent reply	other threads:[~2007-03-07 17:59 UTC|newest]

Thread overview: 112+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-03-07 17:10 [patch 000/101] 2.6.20-stable review Greg KH
2007-03-07 17:10 ` [patch 001/101] ocfs2: ocfs2_link() journal credits update Greg KH
2007-03-07 17:10 ` [patch 002/101] x86_64: fix 2.6.18 regression - PTRACE_OLDSETOPTIONS should be accepted Greg KH
2007-03-07 17:10 ` [patch 003/101] rtc-pcf8563: detect polarity of century bit automatically Greg KH
2007-03-07 17:10 ` [patch 004/101] prism54: correct assignment of DOT1XENABLE in WE-19 codepaths Greg KH
2007-03-07 17:10 ` [patch 005/101] pata_amd: fix an obvious bug in cable detection Greg KH
2007-03-07 17:10 ` Greg KH [this message]
2007-03-07 17:10 ` [patch 007/101] Keys: Fix key serial number collision handling Greg KH
2007-03-07 17:10 ` [patch 008/101] ide: fix drive side 80c cable check Greg KH
2007-03-07 17:10 ` [patch 009/101] bcm43xx: Fix for oops on resume Greg KH
2007-03-07 17:10 ` [patch 010/101] bcm43xx: Fix for oops on ampdu status Greg KH
2007-03-07 17:10 ` [patch 011/101] AGP: intel-agp bugfix Greg KH
2007-03-07 17:10 ` [patch 012/101] Missing critical phys_to_virt in lib/swiotlb.c Greg KH
2007-03-07 17:10 ` [patch 013/101] USB: fix concurrent buffer access in the hub driver Greg KH
2007-03-07 17:10 ` [patch 014/101] USB audio fixes 1 Greg KH
2007-03-07 17:10 ` [patch 015/101] USB audio fixes 2 Greg KH
2007-03-07 17:10 ` [patch 016/101] hda-intel - Dont try to probe invalid codecs Greg KH
2007-03-07 17:10 ` [patch 017/101] Fix various bugs with aligned reads in RAID5 Greg KH
2007-03-07 17:10 ` [patch 018/101] Fix ATM initcall ordering Greg KH
2007-03-07 17:10 ` [patch 019/101] Fix TCP FIN handling Greg KH
2007-03-07 17:10 ` [patch 020/101] Fix allocation failure handling in multicast Greg KH
2007-03-07 17:10 ` [patch 021/101] md: Avoid possible BUG_ON in md bitmap handling Greg KH
2007-03-07 17:10 ` [patch 022/101] Fix compile error for e500 core based processors Greg KH
2007-03-07 17:10 ` [patch 023/101] ieee1394: video1394: DMA fix Greg KH
2007-03-07 17:10 ` [patch 024/101] ieee1394: fix host device registering when nodemgr disabled Greg KH
2007-03-07 17:11 ` [patch 025/101] Fix null pointer dereference in appledisplay driver Greg KH
2007-03-07 17:11 ` [patch 026/101] USB HID: Fix USB vendor and product IDs endianness for USB HID devices Greg KH
2007-03-07 17:11 ` [patch 027/101] Kconfig: FAULT_INJECTION can be selected only if LOCKDEP is enabled Greg KH
2007-03-08 12:17   ` Blaisorblade
2007-03-07 17:11 ` [patch 028/101] MTD: Fatal regression in drivers/mtd/redboot.c in 2.6.20 Greg KH
2007-03-07 17:11 ` [patch 029/101] IPV6: HASHTABLES: Use appropriate seed for caluculating ehash index Greg KH
2007-03-07 17:11 ` [patch 030/101] EHCI: turn off remote wakeup during shutdown Greg KH
2007-03-07 17:11 ` [patch 031/101] Avoid using nfsd process pools on SMP machines Greg KH
2007-03-07 17:11 ` [patch 032/101] Fix recently introduced problem with shutting down a busy NFS server Greg KH
2007-03-07 17:11 ` [patch 033/101] UHCI: fix port resume problem Greg KH
2007-03-07 17:11 ` [patch 034/101] Fix atmarp.h for userspace Greg KH
2007-03-07 17:11 ` [patch 035/101] Clear TCP segmentation offload state in ipt_REJECT Greg KH
2007-03-07 17:11 ` [patch 036/101] Fix IPX module unload Greg KH
2007-03-07 17:11 ` [patch 037/101] : Prevent pseudo garbage in SYNs advertized window Greg KH
2007-03-07 17:11 ` [patch 038/101] Fix oops in xfrm_audit_log() Greg KH
2007-03-07 17:11 ` [patch 039/101] sky2: dont flush good pause frames Greg KH
2007-03-07 17:11 ` [patch 040/101] sky2: transmit timeout deadlock Greg KH
2007-03-07 17:11 ` [patch 041/101] x86_64: Fix wrong gcc check in bitops.h Greg KH
2007-03-07 17:11 ` [patch 042/101] x86: Dont require the vDSO for handling a.out signals Greg KH
2007-03-07 17:11 ` [patch 043/101] i386: Fix broken CONFIG_COMPAT_VDSO on i386 Greg KH
2007-03-07 17:11 ` [patch 044/101] bcm43xx: fix for 4309 Greg KH
2007-03-07 17:11 ` [patch 045/101] md: Fix raid10 recovery problem Greg KH
2007-03-07 17:11 ` [patch 046/101] dvbdev: fix illegal re-usage of fileoperations struct Greg KH
2007-03-07 17:11 ` [patch 047/101] V4L: pvrusb2: Fix video corruption on stream start Greg KH
2007-03-07 17:11 ` [patch 048/101] V4L: pvrusb2: Handle larger cx2341x firmware images Greg KH
2007-03-07 17:11 ` [patch 049/101] DVB: cxusb: fix firmware patch for big endian systems Greg KH
2007-03-07 17:11 ` [patch 050/101] DVB: digitv: open nxt6000 i2c_gate for TDED4 tuner handling Greg KH
2007-03-07 17:11 ` [patch 051/101] V4L: fix cx25840 firmware loading Greg KH
2007-03-07 17:11 ` [patch 052/101] V4L: cx88-blackbird: allow usage of 376836 and 262144 sized firmware images Greg KH
2007-03-07 17:11 ` [patch 053/101] Fix posix-cpu-timer breakage caused by stale p->last_ran value Greg KH
2007-03-07 17:11 ` [patch 054/101] swsusp: Fix possible oops in userland interface Greg KH
2007-03-07 17:11 ` [patch 055/101] sata_sil: ignore and clear spurious IRQs while executing commands by polling Greg KH
2007-03-07 17:11 ` [patch 056/101] fix umask when noACL kernel meets extN tuned for ACLs Greg KH
2007-03-07 17:11 ` [patch 057/101] UML - Fix 2.6.20 hang Greg KH
2007-03-07 17:11 ` [patch 058/101] mmc: Power quirk for ENE controllers Greg KH
2007-03-07 17:11 ` [patch 059/101] bcm43xx: Fix assertion failures in interrupt handler Greg KH
2007-03-07 17:11 ` [patch 060/101] libata: add missing PM callbacks Greg KH
2007-03-07 17:11 ` [patch 061/101] libata: add missing CONFIG_PM in LLDs Greg KH
2007-03-07 17:11 ` [patch 062/101] POWERPC: Fix performance monitor exception Greg KH
2007-03-07 17:11 ` [patch 063/101] HID: fix possible double-free on error path in hid parser Greg KH
2007-03-07 17:11 ` [patch 064/101] Fix interrupt probing on E450 sparc64 systems Greg KH
2007-03-07 17:11 ` [patch 065/101] Fix xfrm_add_sa_expire() return value Greg KH
2007-03-07 17:11 ` [patch 066/101] Fix skb data reallocation handling in IPSEC Greg KH
2007-03-07 17:11 ` [patch 067/101] Fix %100 cpu spinning on sparc64 Greg KH
2007-03-07 17:11 ` [patch 068/101] Fix TCP MD5 locking Greg KH
2007-03-07 17:11 ` [patch 069/101] Dont add anycast reference to device multiple times Greg KH
2007-03-07 17:11 ` [patch 070/101] Fix anycast procfs device leak Greg KH
2007-03-07 17:11 ` [patch 071/101] Fix reference counting (memory leak) problem in __nfulnl_send() and callers related to packet queueing Greg KH
2007-03-07 17:11 ` [patch 072/101] export blk_recount_segments Greg KH
2007-03-07 17:11 ` [patch 073/101] forcedeth: disable msix Greg KH
2007-03-07 17:11 ` [patch 074/101] tty_io: fix race in master pty close/slave pty close path Greg KH
2007-03-07 17:11 ` [patch 075/101] sched: fix SMT scheduler bug Greg KH
2007-03-07 17:11 ` [patch 076/101] USB: usbnet driver bugfix Greg KH
2007-03-07 17:11 ` [patch 077/101] Backport of psmouse suspend/shutdown cleanups Greg KH
2007-03-07 17:11 ` [patch 078/101] Revert "LOG2: Alter get_order() so that it can make use of ilog2() on a constant" Greg KH
2007-03-07 17:11 ` [patch 079/101] RPM: fix double free in portmapper code Greg KH
2007-03-07 17:11 ` [patch 080/101] NLM: Fix double free in __nlm_async_call Greg KH
2007-03-07 17:11 ` [patch 081/101] kexec: Fix CONFIG_SMP=n compilation V2 (ia64) Greg KH
2007-03-07 17:11 ` [patch 082/101] Fix MTRR compat ioctl Greg KH
2007-03-07 17:11 ` [patch 083/101] ufs: restore back support of openstep Greg KH
2007-03-07 17:11 ` [patch 084/101] v9fs_vfs_mkdir(): fix a double free Greg KH
2007-03-07 17:12 ` [patch 085/101] enable mouse button 2+3 emulation for x86 macs Greg KH
2007-03-07 17:12 ` [patch 086/101] hugetlb: preserve hugetlb pte dirty state Greg KH
2007-03-07 17:12 ` [patch 087/101] m32r: build fix for processors without ISA_DSP_LEVEL2 Greg KH
2007-03-07 17:12 ` [patch 088/101] kernel/time/clocksource.c needs struct task_struct on m68k Greg KH
2007-03-07 17:12 ` [patch 089/101] buffer: memorder fix Greg KH
2007-03-07 17:12 ` [patch 090/101] Char: specialix, isr have 2 params Greg KH
2007-03-07 17:12 ` [patch 091/101] lockdep: forward declare struct task_struct Greg KH
2007-03-07 17:12 ` [patch 092/101] kvm: Fix asm constraint for lldt instruction Greg KH
2007-03-07 17:12 ` [patch 093/101] ueagle-atm.c needs sched.h Greg KH
2007-03-07 17:12 ` [patch 094/101] fix section mismatch warning in lockdep Greg KH
2007-03-07 17:12 ` [patch 095/101] throttle_vm_writeout(): dont loop on GFP_NOFS and GFP_NOIO allocations Greg KH
2007-03-07 17:12 ` [patch 096/101] bug in gdth.c crashing machine Greg KH
2007-03-07 17:12 ` [patch 097/101] revert "drivers/net/tulip/dmfe: support basic carrier detection" Greg KH
2007-03-07 18:14   ` Stephen Hemminger
2007-03-08  2:41     ` Dan Williams
2007-03-08  3:06       ` Linus Torvalds
2007-03-07 17:12 ` [patch 098/101] video/aty/mach64_ct.c: fix bogus delay loop Greg KH
2007-03-07 17:12 ` [patch 099/101] pktcdvd: Correctly set cmd_len field in pkt_generic_packet Greg KH
2007-03-07 17:12 ` [patch 100/101] ATA: convert GSI to irq on ia64 Greg KH
2007-03-07 17:12 ` [patch 101/101] gfs2: fix locking mistake Greg KH
2007-03-07 17:34 ` [stable] [patch 000/101] 2.6.20-stable review Greg KH
2007-03-07 23:12   ` [patch 102/101] TCP: Fix minisock tcp_create_openreq_child() typo Chris Wright
2007-03-07 23:12   ` [patch 103/101] Fix buffer overflow in Omnikey CardMan 4040 driver (CVE-2007-0005) Chris Wright
2007-03-07 23:13   ` [patch 104/101] x86-64: survive having no irq mapping for a vector Chris Wright
2007-03-07 18:41 ` [patch 000/101] 2.6.20-stable review Chuck Ebbert
2007-03-07 21:05   ` [stable] " Chris Wright

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070307171136.733769722@mini.kroah.org \
    --to=gregkh@suse.de \
    --cc=akpm@linux-foundation.org \
    --cc=akpm@osdl.org \
    --cc=alan@lxorguk.ukuu.org.uk \
    --cc=cebbert@redhat.com \
    --cc=chuckw@quantumlinux.com \
    --cc=davej@redhat.com \
    --cc=jmforbes@linuxtx.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mkrufky@linuxtv.org \
    --cc=neilb@suse.de \
    --cc=nfs@lists.sourceforge.net \
    --cc=rdunlap@xenotime.net \
    --cc=reviews@ml.cw.f00f.org \
    --cc=stable@kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=tytso@mit.edu \
    --cc=zwane@arm.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox