From: Greg KH <gregkh@suse.de>
To: linux-kernel@vger.kernel.org, stable@kernel.org,
Andrew Morton <akpm@osdl.org>
Cc: Justin Forbes <jmforbes@linuxtx.org>,
Zwane Mwaikambo <zwane@arm.linux.org.uk>,
"Theodore Ts'o" <tytso@mit.edu>,
Randy Dunlap <rdunlap@xenotime.net>,
Dave Jones <davej@redhat.com>,
Chuck Wolber <chuckw@quantumlinux.com>,
Chris Wedgwood <reviews@ml.cw.f00f.org>,
Michael Krufky <mkrufky@linuxtv.org>,
Chuck Ebbert <cebbert@redhat.com>,
torvalds@linux-foundation.org, akpm@linux-foundation.org,
alan@lxorguk.ukuu.org.uk, nfs@lists.sourceforge.net,
Neil Brown <neilb@suse.de>
Subject: [patch 006/101] knfsd: Fix a race in closing NFSd connections.
Date: Wed, 07 Mar 2007 09:10:41 -0800 [thread overview]
Message-ID: <20070307171136.733769722@mini.kroah.org> (raw)
In-Reply-To: 20070307171035.150802805@mini.kroah.org
[-- Attachment #1: knfsd-fix-a-race-in-closing-nfsd-connections.patch --]
[-- Type: text/plain, Size: 6430 bytes --]
If you lose this race, it can iput a socket inode twice and you
get a BUG in fs/inode.c
When I added the option for user-space to close a socket,
I added some cruft to svc_delete_socket so that I could call
that function when closing a socket per user-space request.
This was the wrong thing to do. I should have just set SK_CLOSE
and let normal mechanisms do the work.
Not only wrong, but buggy. The locking is all wrong and it openned
up a race where-by a socket could be closed twice.
So this patch:
Introduces svc_close_socket which sets SK_CLOSE then either leave
the close up to a thread, or calls svc_delete_socket if it can
get SK_BUSY.
Adds a bias to sk_busy which is removed when SK_DEAD is set,
This avoid races around shutting down the socket.
Changes several 'spin_lock' to 'spin_lock_bh' where the _bh
was missing.
Bugzilla-url: http://bugzilla.kernel.org/show_bug.cgi?id=7916
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
include/linux/sunrpc/svcsock.h | 2 -
net/sunrpc/svc.c | 4 +--
net/sunrpc/svcsock.c | 52 +++++++++++++++++++++++++++++------------
3 files changed, 41 insertions(+), 17 deletions(-)
--- linux-2.6.20.1.orig/include/linux/sunrpc/svcsock.h
+++ linux-2.6.20.1/include/linux/sunrpc/svcsock.h
@@ -63,7 +63,7 @@ struct svc_sock {
* Function prototypes.
*/
int svc_makesock(struct svc_serv *, int, unsigned short);
-void svc_delete_socket(struct svc_sock *);
+void svc_close_socket(struct svc_sock *);
int svc_recv(struct svc_rqst *, long);
int svc_send(struct svc_rqst *);
void svc_drop(struct svc_rqst *);
--- linux-2.6.20.1.orig/net/sunrpc/svc.c
+++ linux-2.6.20.1/net/sunrpc/svc.c
@@ -386,7 +386,7 @@ svc_destroy(struct svc_serv *serv)
svsk = list_entry(serv->sv_tempsocks.next,
struct svc_sock,
sk_list);
- svc_delete_socket(svsk);
+ svc_close_socket(svsk);
}
if (serv->sv_shutdown)
serv->sv_shutdown(serv);
@@ -395,7 +395,7 @@ svc_destroy(struct svc_serv *serv)
svsk = list_entry(serv->sv_permsocks.next,
struct svc_sock,
sk_list);
- svc_delete_socket(svsk);
+ svc_close_socket(svsk);
}
cache_clean_deferred(serv);
--- linux-2.6.20.1.orig/net/sunrpc/svcsock.c
+++ linux-2.6.20.1/net/sunrpc/svcsock.c
@@ -62,6 +62,12 @@
* after a clear, the socket must be read/accepted
* if this succeeds, it must be set again.
* SK_CLOSE can set at any time. It is never cleared.
+ * sk_inuse contains a bias of '1' until SK_DEAD is set.
+ * so when sk_inuse hits zero, we know the socket is dead
+ * and no-one is using it.
+ * SK_DEAD can only be set while SK_BUSY is held which ensures
+ * no other thread will be using the socket or will try to
+ * set SK_DEAD.
*
*/
@@ -70,6 +76,7 @@
static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
int *errp, int pmap_reg);
+static void svc_delete_socket(struct svc_sock *svsk);
static void svc_udp_data_ready(struct sock *, int);
static int svc_udp_recvfrom(struct svc_rqst *);
static int svc_udp_sendto(struct svc_rqst *);
@@ -329,8 +336,9 @@ void svc_reserve(struct svc_rqst *rqstp,
static inline void
svc_sock_put(struct svc_sock *svsk)
{
- if (atomic_dec_and_test(&svsk->sk_inuse) &&
- test_bit(SK_DEAD, &svsk->sk_flags)) {
+ if (atomic_dec_and_test(&svsk->sk_inuse)) {
+ BUG_ON(! test_bit(SK_DEAD, &svsk->sk_flags));
+
dprintk("svc: releasing dead socket\n");
if (svsk->sk_sock->file)
sockfd_put(svsk->sk_sock);
@@ -520,7 +528,7 @@ svc_sock_names(char *buf, struct svc_ser
if (!serv)
return 0;
- spin_lock(&serv->sv_lock);
+ spin_lock_bh(&serv->sv_lock);
list_for_each_entry(svsk, &serv->sv_permsocks, sk_list) {
int onelen = one_sock_name(buf+len, svsk);
if (toclose && strcmp(toclose, buf+len) == 0)
@@ -528,12 +536,12 @@ svc_sock_names(char *buf, struct svc_ser
else
len += onelen;
}
- spin_unlock(&serv->sv_lock);
+ spin_unlock_bh(&serv->sv_lock);
if (closesk)
/* Should unregister with portmap, but you cannot
* unregister just one protocol...
*/
- svc_delete_socket(closesk);
+ svc_close_socket(closesk);
else if (toclose)
return -ENOENT;
return len;
@@ -683,6 +691,11 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
return svc_deferred_recv(rqstp);
}
+ if (test_bit(SK_CLOSE, &svsk->sk_flags)) {
+ svc_delete_socket(svsk);
+ return 0;
+ }
+
clear_bit(SK_DATA, &svsk->sk_flags);
while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) {
if (err == -EAGAIN) {
@@ -1176,7 +1189,8 @@ svc_tcp_sendto(struct svc_rqst *rqstp)
rqstp->rq_sock->sk_server->sv_name,
(sent<0)?"got error":"sent only",
sent, xbufp->len);
- svc_delete_socket(rqstp->rq_sock);
+ set_bit(SK_CLOSE, &rqstp->rq_sock->sk_flags);
+ svc_sock_enqueue(rqstp->rq_sock);
sent = -EAGAIN;
}
return sent;
@@ -1495,7 +1509,7 @@ svc_setup_socket(struct svc_serv *serv,
svsk->sk_odata = inet->sk_data_ready;
svsk->sk_owspace = inet->sk_write_space;
svsk->sk_server = serv;
- atomic_set(&svsk->sk_inuse, 0);
+ atomic_set(&svsk->sk_inuse, 1);
svsk->sk_lastrecv = get_seconds();
spin_lock_init(&svsk->sk_defer_lock);
INIT_LIST_HEAD(&svsk->sk_deferred);
@@ -1618,7 +1632,7 @@ bummer:
/*
* Remove a dead socket
*/
-void
+static void
svc_delete_socket(struct svc_sock *svsk)
{
struct svc_serv *serv;
@@ -1644,16 +1658,26 @@ svc_delete_socket(struct svc_sock *svsk)
* while still attached to a queue, the queue itself
* is about to be destroyed (in svc_destroy).
*/
- if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags))
+ if (!test_and_set_bit(SK_DEAD, &svsk->sk_flags)) {
+ BUG_ON(atomic_read(&svsk->sk_inuse)<2);
+ atomic_dec(&svsk->sk_inuse);
if (test_bit(SK_TEMP, &svsk->sk_flags))
serv->sv_tmpcnt--;
+ }
- /* This atomic_inc should be needed - svc_delete_socket
- * should have the semantic of dropping a reference.
- * But it doesn't yet....
- */
- atomic_inc(&svsk->sk_inuse);
spin_unlock_bh(&serv->sv_lock);
+}
+
+void svc_close_socket(struct svc_sock *svsk)
+{
+ set_bit(SK_CLOSE, &svsk->sk_flags);
+ if (test_and_set_bit(SK_BUSY, &svsk->sk_flags))
+ /* someone else will have to effect the close */
+ return;
+
+ atomic_inc(&svsk->sk_inuse);
+ svc_delete_socket(svsk);
+ clear_bit(SK_BUSY, &svsk->sk_flags);
svc_sock_put(svsk);
}
--
next prev parent reply other threads:[~2007-03-07 17:59 UTC|newest]
Thread overview: 112+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-03-07 17:10 [patch 000/101] 2.6.20-stable review Greg KH
2007-03-07 17:10 ` [patch 001/101] ocfs2: ocfs2_link() journal credits update Greg KH
2007-03-07 17:10 ` [patch 002/101] x86_64: fix 2.6.18 regression - PTRACE_OLDSETOPTIONS should be accepted Greg KH
2007-03-07 17:10 ` [patch 003/101] rtc-pcf8563: detect polarity of century bit automatically Greg KH
2007-03-07 17:10 ` [patch 004/101] prism54: correct assignment of DOT1XENABLE in WE-19 codepaths Greg KH
2007-03-07 17:10 ` [patch 005/101] pata_amd: fix an obvious bug in cable detection Greg KH
2007-03-07 17:10 ` Greg KH [this message]
2007-03-07 17:10 ` [patch 007/101] Keys: Fix key serial number collision handling Greg KH
2007-03-07 17:10 ` [patch 008/101] ide: fix drive side 80c cable check Greg KH
2007-03-07 17:10 ` [patch 009/101] bcm43xx: Fix for oops on resume Greg KH
2007-03-07 17:10 ` [patch 010/101] bcm43xx: Fix for oops on ampdu status Greg KH
2007-03-07 17:10 ` [patch 011/101] AGP: intel-agp bugfix Greg KH
2007-03-07 17:10 ` [patch 012/101] Missing critical phys_to_virt in lib/swiotlb.c Greg KH
2007-03-07 17:10 ` [patch 013/101] USB: fix concurrent buffer access in the hub driver Greg KH
2007-03-07 17:10 ` [patch 014/101] USB audio fixes 1 Greg KH
2007-03-07 17:10 ` [patch 015/101] USB audio fixes 2 Greg KH
2007-03-07 17:10 ` [patch 016/101] hda-intel - Dont try to probe invalid codecs Greg KH
2007-03-07 17:10 ` [patch 017/101] Fix various bugs with aligned reads in RAID5 Greg KH
2007-03-07 17:10 ` [patch 018/101] Fix ATM initcall ordering Greg KH
2007-03-07 17:10 ` [patch 019/101] Fix TCP FIN handling Greg KH
2007-03-07 17:10 ` [patch 020/101] Fix allocation failure handling in multicast Greg KH
2007-03-07 17:10 ` [patch 021/101] md: Avoid possible BUG_ON in md bitmap handling Greg KH
2007-03-07 17:10 ` [patch 022/101] Fix compile error for e500 core based processors Greg KH
2007-03-07 17:10 ` [patch 023/101] ieee1394: video1394: DMA fix Greg KH
2007-03-07 17:10 ` [patch 024/101] ieee1394: fix host device registering when nodemgr disabled Greg KH
2007-03-07 17:11 ` [patch 025/101] Fix null pointer dereference in appledisplay driver Greg KH
2007-03-07 17:11 ` [patch 026/101] USB HID: Fix USB vendor and product IDs endianness for USB HID devices Greg KH
2007-03-07 17:11 ` [patch 027/101] Kconfig: FAULT_INJECTION can be selected only if LOCKDEP is enabled Greg KH
2007-03-08 12:17 ` Blaisorblade
2007-03-07 17:11 ` [patch 028/101] MTD: Fatal regression in drivers/mtd/redboot.c in 2.6.20 Greg KH
2007-03-07 17:11 ` [patch 029/101] IPV6: HASHTABLES: Use appropriate seed for caluculating ehash index Greg KH
2007-03-07 17:11 ` [patch 030/101] EHCI: turn off remote wakeup during shutdown Greg KH
2007-03-07 17:11 ` [patch 031/101] Avoid using nfsd process pools on SMP machines Greg KH
2007-03-07 17:11 ` [patch 032/101] Fix recently introduced problem with shutting down a busy NFS server Greg KH
2007-03-07 17:11 ` [patch 033/101] UHCI: fix port resume problem Greg KH
2007-03-07 17:11 ` [patch 034/101] Fix atmarp.h for userspace Greg KH
2007-03-07 17:11 ` [patch 035/101] Clear TCP segmentation offload state in ipt_REJECT Greg KH
2007-03-07 17:11 ` [patch 036/101] Fix IPX module unload Greg KH
2007-03-07 17:11 ` [patch 037/101] : Prevent pseudo garbage in SYNs advertized window Greg KH
2007-03-07 17:11 ` [patch 038/101] Fix oops in xfrm_audit_log() Greg KH
2007-03-07 17:11 ` [patch 039/101] sky2: dont flush good pause frames Greg KH
2007-03-07 17:11 ` [patch 040/101] sky2: transmit timeout deadlock Greg KH
2007-03-07 17:11 ` [patch 041/101] x86_64: Fix wrong gcc check in bitops.h Greg KH
2007-03-07 17:11 ` [patch 042/101] x86: Dont require the vDSO for handling a.out signals Greg KH
2007-03-07 17:11 ` [patch 043/101] i386: Fix broken CONFIG_COMPAT_VDSO on i386 Greg KH
2007-03-07 17:11 ` [patch 044/101] bcm43xx: fix for 4309 Greg KH
2007-03-07 17:11 ` [patch 045/101] md: Fix raid10 recovery problem Greg KH
2007-03-07 17:11 ` [patch 046/101] dvbdev: fix illegal re-usage of fileoperations struct Greg KH
2007-03-07 17:11 ` [patch 047/101] V4L: pvrusb2: Fix video corruption on stream start Greg KH
2007-03-07 17:11 ` [patch 048/101] V4L: pvrusb2: Handle larger cx2341x firmware images Greg KH
2007-03-07 17:11 ` [patch 049/101] DVB: cxusb: fix firmware patch for big endian systems Greg KH
2007-03-07 17:11 ` [patch 050/101] DVB: digitv: open nxt6000 i2c_gate for TDED4 tuner handling Greg KH
2007-03-07 17:11 ` [patch 051/101] V4L: fix cx25840 firmware loading Greg KH
2007-03-07 17:11 ` [patch 052/101] V4L: cx88-blackbird: allow usage of 376836 and 262144 sized firmware images Greg KH
2007-03-07 17:11 ` [patch 053/101] Fix posix-cpu-timer breakage caused by stale p->last_ran value Greg KH
2007-03-07 17:11 ` [patch 054/101] swsusp: Fix possible oops in userland interface Greg KH
2007-03-07 17:11 ` [patch 055/101] sata_sil: ignore and clear spurious IRQs while executing commands by polling Greg KH
2007-03-07 17:11 ` [patch 056/101] fix umask when noACL kernel meets extN tuned for ACLs Greg KH
2007-03-07 17:11 ` [patch 057/101] UML - Fix 2.6.20 hang Greg KH
2007-03-07 17:11 ` [patch 058/101] mmc: Power quirk for ENE controllers Greg KH
2007-03-07 17:11 ` [patch 059/101] bcm43xx: Fix assertion failures in interrupt handler Greg KH
2007-03-07 17:11 ` [patch 060/101] libata: add missing PM callbacks Greg KH
2007-03-07 17:11 ` [patch 061/101] libata: add missing CONFIG_PM in LLDs Greg KH
2007-03-07 17:11 ` [patch 062/101] POWERPC: Fix performance monitor exception Greg KH
2007-03-07 17:11 ` [patch 063/101] HID: fix possible double-free on error path in hid parser Greg KH
2007-03-07 17:11 ` [patch 064/101] Fix interrupt probing on E450 sparc64 systems Greg KH
2007-03-07 17:11 ` [patch 065/101] Fix xfrm_add_sa_expire() return value Greg KH
2007-03-07 17:11 ` [patch 066/101] Fix skb data reallocation handling in IPSEC Greg KH
2007-03-07 17:11 ` [patch 067/101] Fix %100 cpu spinning on sparc64 Greg KH
2007-03-07 17:11 ` [patch 068/101] Fix TCP MD5 locking Greg KH
2007-03-07 17:11 ` [patch 069/101] Dont add anycast reference to device multiple times Greg KH
2007-03-07 17:11 ` [patch 070/101] Fix anycast procfs device leak Greg KH
2007-03-07 17:11 ` [patch 071/101] Fix reference counting (memory leak) problem in __nfulnl_send() and callers related to packet queueing Greg KH
2007-03-07 17:11 ` [patch 072/101] export blk_recount_segments Greg KH
2007-03-07 17:11 ` [patch 073/101] forcedeth: disable msix Greg KH
2007-03-07 17:11 ` [patch 074/101] tty_io: fix race in master pty close/slave pty close path Greg KH
2007-03-07 17:11 ` [patch 075/101] sched: fix SMT scheduler bug Greg KH
2007-03-07 17:11 ` [patch 076/101] USB: usbnet driver bugfix Greg KH
2007-03-07 17:11 ` [patch 077/101] Backport of psmouse suspend/shutdown cleanups Greg KH
2007-03-07 17:11 ` [patch 078/101] Revert "LOG2: Alter get_order() so that it can make use of ilog2() on a constant" Greg KH
2007-03-07 17:11 ` [patch 079/101] RPM: fix double free in portmapper code Greg KH
2007-03-07 17:11 ` [patch 080/101] NLM: Fix double free in __nlm_async_call Greg KH
2007-03-07 17:11 ` [patch 081/101] kexec: Fix CONFIG_SMP=n compilation V2 (ia64) Greg KH
2007-03-07 17:11 ` [patch 082/101] Fix MTRR compat ioctl Greg KH
2007-03-07 17:11 ` [patch 083/101] ufs: restore back support of openstep Greg KH
2007-03-07 17:11 ` [patch 084/101] v9fs_vfs_mkdir(): fix a double free Greg KH
2007-03-07 17:12 ` [patch 085/101] enable mouse button 2+3 emulation for x86 macs Greg KH
2007-03-07 17:12 ` [patch 086/101] hugetlb: preserve hugetlb pte dirty state Greg KH
2007-03-07 17:12 ` [patch 087/101] m32r: build fix for processors without ISA_DSP_LEVEL2 Greg KH
2007-03-07 17:12 ` [patch 088/101] kernel/time/clocksource.c needs struct task_struct on m68k Greg KH
2007-03-07 17:12 ` [patch 089/101] buffer: memorder fix Greg KH
2007-03-07 17:12 ` [patch 090/101] Char: specialix, isr have 2 params Greg KH
2007-03-07 17:12 ` [patch 091/101] lockdep: forward declare struct task_struct Greg KH
2007-03-07 17:12 ` [patch 092/101] kvm: Fix asm constraint for lldt instruction Greg KH
2007-03-07 17:12 ` [patch 093/101] ueagle-atm.c needs sched.h Greg KH
2007-03-07 17:12 ` [patch 094/101] fix section mismatch warning in lockdep Greg KH
2007-03-07 17:12 ` [patch 095/101] throttle_vm_writeout(): dont loop on GFP_NOFS and GFP_NOIO allocations Greg KH
2007-03-07 17:12 ` [patch 096/101] bug in gdth.c crashing machine Greg KH
2007-03-07 17:12 ` [patch 097/101] revert "drivers/net/tulip/dmfe: support basic carrier detection" Greg KH
2007-03-07 18:14 ` Stephen Hemminger
2007-03-08 2:41 ` Dan Williams
2007-03-08 3:06 ` Linus Torvalds
2007-03-07 17:12 ` [patch 098/101] video/aty/mach64_ct.c: fix bogus delay loop Greg KH
2007-03-07 17:12 ` [patch 099/101] pktcdvd: Correctly set cmd_len field in pkt_generic_packet Greg KH
2007-03-07 17:12 ` [patch 100/101] ATA: convert GSI to irq on ia64 Greg KH
2007-03-07 17:12 ` [patch 101/101] gfs2: fix locking mistake Greg KH
2007-03-07 17:34 ` [stable] [patch 000/101] 2.6.20-stable review Greg KH
2007-03-07 23:12 ` [patch 102/101] TCP: Fix minisock tcp_create_openreq_child() typo Chris Wright
2007-03-07 23:12 ` [patch 103/101] Fix buffer overflow in Omnikey CardMan 4040 driver (CVE-2007-0005) Chris Wright
2007-03-07 23:13 ` [patch 104/101] x86-64: survive having no irq mapping for a vector Chris Wright
2007-03-07 18:41 ` [patch 000/101] 2.6.20-stable review Chuck Ebbert
2007-03-07 21:05 ` [stable] " Chris Wright
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20070307171136.733769722@mini.kroah.org \
--to=gregkh@suse.de \
--cc=akpm@linux-foundation.org \
--cc=akpm@osdl.org \
--cc=alan@lxorguk.ukuu.org.uk \
--cc=cebbert@redhat.com \
--cc=chuckw@quantumlinux.com \
--cc=davej@redhat.com \
--cc=jmforbes@linuxtx.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mkrufky@linuxtv.org \
--cc=neilb@suse.de \
--cc=nfs@lists.sourceforge.net \
--cc=rdunlap@xenotime.net \
--cc=reviews@ml.cw.f00f.org \
--cc=stable@kernel.org \
--cc=torvalds@linux-foundation.org \
--cc=tytso@mit.edu \
--cc=zwane@arm.linux.org.uk \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox