* Race condition in xprt_disconnect
@ 2004-04-02 8:32 Olaf Kirch
0 siblings, 0 replies; 2+ messages in thread
From: Olaf Kirch @ 2004-04-02 8:32 UTC (permalink / raw)
To: nfs
[-- Attachment #1: Type: text/plain, Size: 737 bytes --]
Hi,
it seems bad things can happen if two RPC processes call xprt_disconnect
at the same time. I have two bug reports of ppc machines oopsing
somewhere deep in some SELinux functions because inode->i_security
is NULL. inode->i_security is allocated when the inode is created,
and cleared when the inode is destroyed.
The stack looks something like
xprt_disconnect->sock_release->...->selinux_foofah
The only way I can see this happening is if sock_release is called twice,
and indeed we do not seem to protect against this. Please take a look
at the attached patch. It should prevent these oopses, but I'm not
entirely sure it's safe.
Thanks,
Olaf
--
Olaf Kirch | The Hardware Gods hate me.
okir@suse.de |
---------------+
[-- Attachment #2: sunrpc-disconnect-race --]
[-- Type: text/plain, Size: 2849 bytes --]
--- linux-2.6.4/net/sunrpc/xprt.c.fix 2004-04-01 18:51:50.000000000 +0200
+++ linux-2.6.4/net/sunrpc/xprt.c 2004-04-01 18:51:03.000000000 +0200
@@ -87,7 +87,7 @@
static struct rpc_xprt * xprt_setup(int proto, struct sockaddr_in *ap,
struct rpc_timeout *to);
static struct socket *xprt_create_socket(struct rpc_xprt *, int, int);
-static void xprt_bind_socket(struct rpc_xprt *, struct socket *);
+static int xprt_bind_socket(struct rpc_xprt *, struct socket *);
static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
#ifdef RPC_DEBUG_DATA
@@ -389,26 +389,33 @@
static void
xprt_close(struct rpc_xprt *xprt, int reconnecting)
{
- struct socket *sock = xprt->sock;
- struct sock *sk = xprt->inet;
-
- if (!sk)
- return;
-
- write_lock_bh(&sk->sk_callback_lock);
+ struct socket *sock;
+ struct sock *sk;
+
+ spin_lock_bh(&xprt->sock_lock);
+ if ((sk = xprt->inet) != NULL)
+ sock_hold(sk);
xprt->inet = NULL;
+
+ sock = xprt->sock;
xprt->sock = NULL;
+ spin_unlock_bh(&xprt->sock_lock);
- sk->sk_user_data = NULL;
- sk->sk_data_ready = xprt->old_data_ready;
- sk->sk_state_change = xprt->old_state_change;
- sk->sk_write_space = xprt->old_write_space;
- write_unlock_bh(&sk->sk_callback_lock);
+ if (sk != NULL) {
+ write_lock_bh(&sk->sk_callback_lock);
+ sk->sk_user_data = NULL;
+ sk->sk_data_ready = xprt->old_data_ready;
+ sk->sk_state_change = xprt->old_state_change;
+ sk->sk_write_space = xprt->old_write_space;
+ sk->sk_no_check = 0;
+ write_unlock_bh(&sk->sk_callback_lock);
+ sock_put(sk);
+ }
xprt_disconnect(xprt, reconnecting);
- sk->sk_no_check = 0;
- sock_release(sock);
+ if (sock)
+ sock_release(sock);
}
static void
@@ -481,7 +488,10 @@
goto out_err;
return;
}
- xprt_bind_socket(xprt, sock);
+ if (xprt_bind_socket(xprt, sock) < 0) {
+ sock_release(sock);
+ goto out_err;
+ }
xprt_sock_setbufsize(xprt);
if (!xprt->stream)
@@ -1520,13 +1530,17 @@
return err;
}
-static void
+static int
xprt_bind_socket(struct rpc_xprt *xprt, struct socket *sock)
{
struct sock *sk = sock->sk;
+ int error = -EBUSY;
+ /* Can it happen that xprt->inet is non-null when we
+ * get here? If so we should indicate an error so
+ * that the caller can drop the newly created socket */
if (xprt->inet)
- return;
+ goto out;
write_lock_bh(&sk->sk_callback_lock);
sk->sk_user_data = xprt;
@@ -1545,13 +1559,19 @@
xprt_clear_connected(xprt);
}
sk->sk_write_space = xprt_write_space;
+ write_unlock_bh(&sk->sk_callback_lock);
/* Reset to new socket */
- xprt->sock = sock;
- xprt->inet = sk;
- write_unlock_bh(&sk->sk_callback_lock);
+ spin_lock_bh(&xprt->sock_lock);
+ if (xprt->inet == NULL) {
+ xprt->sock = sock;
+ xprt->inet = sk;
+ error = 0;
+ }
+ spin_unlock_bh(&xprt->sock_lock);
- return;
+out:
+ return error;
}
/*
^ permalink raw reply [flat|nested] 2+ messages in thread
* RE: Race condition in xprt_disconnect
@ 2004-04-05 20:39 Trond Myklebust
0 siblings, 0 replies; 2+ messages in thread
From: Trond Myklebust @ 2004-04-05 20:39 UTC (permalink / raw)
To: Olaf Kirch; +Cc: nfs
[-- Attachment #1: Type: text/plain, Size: 1603 bytes --]
it seems bad things can happen if two RPC processes call
xprt_disconnect at the same time. I have two bug reports of ppc
machines oopsing somewhere deep in some SELinux functions
because inode->i_security is NULL. inode->i_security is
allocated when the inode is created, and cleared when the inode
is destroyed.
The stack looks something like
xprt_disconnect->sock_release->...->selinux_foofah
The only way I can see this happening is if sock_release is
called twice, and indeed we do not seem to protect against this.
Please take a look at the attached patch. It should prevent
these oopses, but I'm not entirely sure it's safe.
I initially thought that patch you sent out was correct, however the
more I think about it, the more I'm convinced it isn't sufficient.
The only way I see that we can have duplicate calls to sock_release() is
if we are scheduling xprt_socket_autoclose() or xprt_socket_connect()
more than once.
Normally xprt_lock_write() should protect us against this, and so AFAICS
any duplicates must imply that xprt_lock_write() is getting lost before
xprt_socket_connect() has had a chance to run. Presumably this is
occurring because xprt->snd_task is timing out and then being woken up
from xprt->pending.
Note that in that case we don't actually *want* to schedule a new
connect(), since the problem is not that the networking operation timed
out, but rather that keventd is being too slow...
The following patch should hopefully protect against this problem.
Cheers,
Trond
[-- Attachment #2: linux-2.6.5-34-sunrpc_disconnect.dif --]
[-- Type: text/plain, Size: 4253 bytes --]
include/linux/sunrpc/xprt.h | 5 ++-
net/sunrpc/xprt.c | 58 ++++++++++++++++++--------------------------
2 files changed, 27 insertions(+), 36 deletions(-)
diff -u --recursive --new-file --show-c-function linux-2.6.5-33-grace/include/linux/sunrpc/xprt.h linux-2.6.5-34-sunrpc_disconnect/include/linux/sunrpc/xprt.h
--- linux-2.6.5-33-grace/include/linux/sunrpc/xprt.h 2004-04-05 13:24:48.000000000 -0400
+++ linux-2.6.5-34-sunrpc_disconnect/include/linux/sunrpc/xprt.h 2004-04-05 14:49:48.000000000 -0400
@@ -216,8 +216,9 @@ void xprt_connect(struct rpc_task *);
int xprt_clear_backlog(struct rpc_xprt *);
void xprt_sock_setbufsize(struct rpc_xprt *);
-#define XPRT_CONNECT 0
-#define XPRT_LOCKED 1
+#define XPRT_LOCKED 0
+#define XPRT_CONNECT 1
+#define XPRT_CONNECTING 2
#define xprt_connected(xp) (test_bit(XPRT_CONNECT, &(xp)->sockstate))
#define xprt_set_connected(xp) (set_bit(XPRT_CONNECT, &(xp)->sockstate))
diff -u --recursive --new-file --show-c-function linux-2.6.5-33-grace/net/sunrpc/xprt.c linux-2.6.5-34-sunrpc_disconnect/net/sunrpc/xprt.c
--- linux-2.6.5-33-grace/net/sunrpc/xprt.c 2004-04-05 13:24:58.000000000 -0400
+++ linux-2.6.5-34-sunrpc_disconnect/net/sunrpc/xprt.c 2004-04-05 15:27:42.000000000 -0400
@@ -448,7 +448,10 @@ xprt_init_autodisconnect(unsigned long d
goto out_abort;
spin_unlock(&xprt->sock_lock);
/* Let keventd close the socket */
- schedule_work(&xprt->task_cleanup);
+ if (test_bit(XPRT_CONNECTING, &xprt->sockstate) != 0)
+ xprt_release_write(xprt, NULL);
+ else
+ schedule_work(&xprt->task_cleanup);
return;
out_abort:
spin_unlock(&xprt->sock_lock);
@@ -460,12 +463,8 @@ static void xprt_socket_connect(void *ar
struct socket *sock = xprt->sock;
int status = -EIO;
- if (xprt->shutdown) {
- rpc_wake_up_status(&xprt->pending, -EIO);
- return;
- }
- if (!xprt->addr.sin_port)
- goto out_err;
+ if (xprt->shutdown || xprt->addr.sin_port == 0)
+ goto out;
/*
* Start by resetting any existing state
@@ -475,12 +474,12 @@ static void xprt_socket_connect(void *ar
if (sock == NULL) {
/* couldn't create socket or bind to reserved port;
* this is likely a permanent error, so cause an abort */
- goto out_err;
- return;
+ goto out;
}
xprt_bind_socket(xprt, sock);
xprt_sock_setbufsize(xprt);
+ status = 0;
if (!xprt->stream)
goto out;
@@ -491,29 +490,22 @@ static void xprt_socket_connect(void *ar
sizeof(xprt->addr), O_NONBLOCK);
dprintk("RPC: %p connect status %d connected %d sock state %d\n",
xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
- if (status >= 0)
- goto out;
- switch (status) {
- case -EINPROGRESS:
- case -EALREADY:
- return;
- default:
- goto out_err;
+ if (status < 0) {
+ switch (status) {
+ case -EINPROGRESS:
+ case -EALREADY:
+ goto out_clear;
+ }
}
out:
- spin_lock_bh(&xprt->sock_lock);
- if (xprt->snd_task)
- rpc_wake_up_task(xprt->snd_task);
- spin_unlock_bh(&xprt->sock_lock);
- return;
-out_err:
- spin_lock_bh(&xprt->sock_lock);
- if (xprt->snd_task) {
- xprt->snd_task->tk_status = status;
- rpc_wake_up_task(xprt->snd_task);
- } else
- rpc_wake_up_status(&xprt->pending, -ENOTCONN);
- spin_unlock_bh(&xprt->sock_lock);
+ if (status < 0)
+ rpc_wake_up_status(&xprt->pending, status);
+ else
+ rpc_wake_up(&xprt->pending);
+out_clear:
+ smp_mb__before_clear_bit();
+ clear_bit(XPRT_CONNECTING, &xprt->sockstate);
+ smp_mb__after_clear_bit();
}
/*
@@ -545,7 +537,8 @@ void xprt_connect(struct rpc_task *task)
task->tk_timeout = RPC_CONNECT_TIMEOUT;
rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL);
- schedule_work(&xprt->sock_connect);
+ if (!test_and_set_bit(XPRT_CONNECTING, &xprt->sockstate))
+ schedule_work(&xprt->sock_connect);
return;
out_write:
xprt_release_write(xprt, task);
@@ -1027,9 +1020,6 @@ tcp_state_change(struct sock *sk)
xprt->tcp_reclen = 0;
xprt->tcp_copied = 0;
xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID;
-
- if (xprt->snd_task)
- rpc_wake_up_task(xprt->snd_task);
rpc_wake_up(&xprt->pending);
}
spin_unlock_bh(&xprt->sock_lock);
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2004-04-05 20:39 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-04-02 8:32 Race condition in xprt_disconnect Olaf Kirch
-- strict thread matches above, loose matches on Subject: below --
2004-04-05 20:39 Trond Myklebust
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.