From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org, netdev@vger.kernel.org
Cc: Linus Torvalds <torvalds@osdl.org>, Andrew Morton <akpm@osdl.org>,
David Miller <davem@davemloft.net>,
Rik van Riel <riel@redhat.com>,
Daniel Phillips <phillips@google.com>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Trond Myklebust <trond.myklebust@fys.uio.no>
Subject: [PATCH 09/20] nfs: make swap on NFS robust
Date: Tue, 12 Sep 2006 17:25:49 +0200 [thread overview]
Message-ID: <20060912144903.985364000@chello.nl> (raw)
In-Reply-To: 20060912143049.278065000@chello.nl
[-- Attachment #1: nfs_vmio.patch --]
[-- Type: text/plain, Size: 5416 bytes --]
Provide a proper a_ops->swapfile() implementation for NFS. This will set the
NFS socket to SOCK_VMIO and run socket reconnect under PF_MEMALLOC as well
as reset SOCK_VMIO before engaging the protocol ->connect() method.
PF_MEMALLOC should allow the allocation of struct socket and related objects
and the early (re)setting of SOCK_VMIO should allow us to receive the packets
required for the TCP connection buildup.
(swapping continues over a server reset during a large (4k) ping flood)
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Trond Myklebust <trond.myklebust@fys.uio.no>
---
fs/nfs/file.c | 2 -
include/linux/sunrpc/xprt.h | 5 +++-
net/sunrpc/sched.c | 4 +--
net/sunrpc/xprtsock.c | 47 ++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 54 insertions(+), 4 deletions(-)
Index: linux-2.6/fs/nfs/file.c
===================================================================
--- linux-2.6.orig/fs/nfs/file.c
+++ linux-2.6/fs/nfs/file.c
@@ -323,7 +323,7 @@ static int nfs_release_page(struct page
static int nfs_swapfile(struct address_space *mapping, int enable)
{
- return 0;
+ return xs_swapper(NFS_CLIENT(mapping->host)->cl_xprt, enable);
}
const struct address_space_operations nfs_file_aops = {
Index: linux-2.6/net/sunrpc/xprtsock.c
===================================================================
--- linux-2.6.orig/net/sunrpc/xprtsock.c
+++ linux-2.6/net/sunrpc/xprtsock.c
@@ -1014,6 +1014,7 @@ static void xs_udp_connect_worker(void *
{
struct rpc_xprt *xprt = (struct rpc_xprt *) args;
struct socket *sock = xprt->sock;
+ unsigned long pflags = current->flags;
int err, status = -EIO;
if (xprt->shutdown || xprt->addr.sin_port == 0)
@@ -1021,6 +1022,9 @@ static void xs_udp_connect_worker(void *
dprintk("RPC: xs_udp_connect_worker for xprt %p\n", xprt);
+ if (xprt->swapper)
+ current->flags |= PF_MEMALLOC;
+
/* Start by resetting any existing state */
xs_close(xprt);
@@ -1054,6 +1058,9 @@ static void xs_udp_connect_worker(void *
xprt->sock = sock;
xprt->inet = sk;
+ if (xprt->swapper)
+ sk_set_vmio(sk);
+
write_unlock_bh(&sk->sk_callback_lock);
}
xs_udp_do_set_buffer_size(xprt);
@@ -1061,6 +1068,7 @@ static void xs_udp_connect_worker(void *
out:
xprt_wake_pending_tasks(xprt, status);
xprt_clear_connecting(xprt);
+ current->flags = pflags;
}
/*
@@ -1097,11 +1105,15 @@ static void xs_tcp_connect_worker(void *
{
struct rpc_xprt *xprt = (struct rpc_xprt *)args;
struct socket *sock = xprt->sock;
+ unsigned long pflags = current->flags;
int err, status = -EIO;
if (xprt->shutdown || xprt->addr.sin_port == 0)
goto out;
+ if (xprt->swapper)
+ current->flags |= PF_MEMALLOC;
+
dprintk("RPC: xs_tcp_connect_worker for xprt %p\n", xprt);
if (!xprt->sock) {
@@ -1148,6 +1160,10 @@ static void xs_tcp_connect_worker(void *
write_unlock_bh(&sk->sk_callback_lock);
}
+
+ if (xprt->swapper)
+ sk_set_vmio(xprt->inet);
+
/* Tell the socket layer to start connecting... */
xprt->stat.connect_count++;
xprt->stat.connect_start = jiffies;
@@ -1174,6 +1190,7 @@ out:
xprt_wake_pending_tasks(xprt, status);
out_clear:
xprt_clear_connecting(xprt);
+ current->flags = pflags;
}
/**
@@ -1369,3 +1386,33 @@ int xs_setup_tcp(struct rpc_xprt *xprt,
return 0;
}
+
+#define RPC_BUF_RESERVE_PAGES (1) /* XXX: how many concurrent rpc buffers? */
+#define RPC_RESERVE_PAGES (RPC_BUF_RESERVE_PAGES + TX_RESERVE_PAGES)
+
+/**
+ * xs_swapper - Tag this transport as being used for swap.
+ * @xprt: transport to tag
+ * @enable: enable/disable
+ *
+ */
+int xs_swapper(struct rpc_xprt *xprt, int enable)
+{
+ int err = 0;
+
+ if (enable) {
+ /*
+ * keep one extra sock reference so the reserve won't dip
+ * when the socket gets reconnected.
+ */
+ sk_adjust_memalloc(1, RPC_RESERVE_PAGES);
+ sk_set_vmio(xprt->inet);
+ xprt->swapper = 1;
+ } else if (xprt->swapper) {
+ xprt->swapper = 0;
+ sk_clear_vmio(xprt->inet);
+ sk_adjust_memalloc(-1, -RPC_RESERVE_PAGES);
+ }
+
+ return err;
+}
Index: linux-2.6/include/linux/sunrpc/xprt.h
===================================================================
--- linux-2.6.orig/include/linux/sunrpc/xprt.h
+++ linux-2.6/include/linux/sunrpc/xprt.h
@@ -147,7 +147,9 @@ struct rpc_xprt {
unsigned int max_reqs; /* total slots */
unsigned long state; /* transport state */
unsigned char shutdown : 1, /* being shut down */
- resvport : 1; /* use a reserved port */
+ resvport : 1, /* use a reserved port */
+ swapper : 1; /* we're swapping over this
+ transport */
/*
* XID
@@ -261,6 +263,7 @@ void xprt_disconnect(struct rpc_xprt *
*/
int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to);
int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to);
+int xs_swapper(struct rpc_xprt *xprt, int enable);
/*
* Reserved bit positions in xprt->state
Index: linux-2.6/net/sunrpc/sched.c
===================================================================
--- linux-2.6.orig/net/sunrpc/sched.c
+++ linux-2.6/net/sunrpc/sched.c
@@ -736,8 +736,8 @@ void * rpc_malloc(struct rpc_task *task,
struct rpc_rqst *req = task->tk_rqstp;
gfp_t gfp;
- if (task->tk_flags & RPC_TASK_SWAPPER)
- gfp = GFP_ATOMIC;
+ if (RPC_IS_SWAPPER(task))
+ gfp = GFP_ATOMIC | __GFP_EMERGENCY;
else
gfp = GFP_NOFS;
--
WARNING: multiple messages have this Message-ID (diff)
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
To: linux-mm@kvack.org, linux-kernel@vger.kernel.org, netdev@vger.kernel.org
Cc: Linus Torvalds <torvalds@osdl.org>, Andrew Morton <akpm@osdl.org>,
David Miller <davem@davemloft.net>,
Rik van Riel <riel@redhat.com>,
Daniel Phillips <phillips@google.com>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Trond Myklebust <trond.myklebust@fys.uio.no>
Subject: [PATCH 09/20] nfs: make swap on NFS robust
Date: Tue, 12 Sep 2006 17:25:49 +0200 [thread overview]
Message-ID: <20060912144903.985364000@chello.nl> (raw)
In-Reply-To: 20060912143049.278065000@chello.nl
[-- Attachment #1: nfs_vmio.patch --]
[-- Type: text/plain, Size: 5641 bytes --]
Provide a proper a_ops->swapfile() implementation for NFS. This will set the
NFS socket to SOCK_VMIO and run socket reconnect under PF_MEMALLOC as well
as reset SOCK_VMIO before engaging the protocol ->connect() method.
PF_MEMALLOC should allow the allocation of struct socket and related objects
and the early (re)setting of SOCK_VMIO should allow us to receive the packets
required for the TCP connection buildup.
(swapping continues over a server reset during a large (4k) ping flood)
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
CC: Trond Myklebust <trond.myklebust@fys.uio.no>
---
fs/nfs/file.c | 2 -
include/linux/sunrpc/xprt.h | 5 +++-
net/sunrpc/sched.c | 4 +--
net/sunrpc/xprtsock.c | 47 ++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 54 insertions(+), 4 deletions(-)
Index: linux-2.6/fs/nfs/file.c
===================================================================
--- linux-2.6.orig/fs/nfs/file.c
+++ linux-2.6/fs/nfs/file.c
@@ -323,7 +323,7 @@ static int nfs_release_page(struct page
static int nfs_swapfile(struct address_space *mapping, int enable)
{
- return 0;
+ return xs_swapper(NFS_CLIENT(mapping->host)->cl_xprt, enable);
}
const struct address_space_operations nfs_file_aops = {
Index: linux-2.6/net/sunrpc/xprtsock.c
===================================================================
--- linux-2.6.orig/net/sunrpc/xprtsock.c
+++ linux-2.6/net/sunrpc/xprtsock.c
@@ -1014,6 +1014,7 @@ static void xs_udp_connect_worker(void *
{
struct rpc_xprt *xprt = (struct rpc_xprt *) args;
struct socket *sock = xprt->sock;
+ unsigned long pflags = current->flags;
int err, status = -EIO;
if (xprt->shutdown || xprt->addr.sin_port == 0)
@@ -1021,6 +1022,9 @@ static void xs_udp_connect_worker(void *
dprintk("RPC: xs_udp_connect_worker for xprt %p\n", xprt);
+ if (xprt->swapper)
+ current->flags |= PF_MEMALLOC;
+
/* Start by resetting any existing state */
xs_close(xprt);
@@ -1054,6 +1058,9 @@ static void xs_udp_connect_worker(void *
xprt->sock = sock;
xprt->inet = sk;
+ if (xprt->swapper)
+ sk_set_vmio(sk);
+
write_unlock_bh(&sk->sk_callback_lock);
}
xs_udp_do_set_buffer_size(xprt);
@@ -1061,6 +1068,7 @@ static void xs_udp_connect_worker(void *
out:
xprt_wake_pending_tasks(xprt, status);
xprt_clear_connecting(xprt);
+ current->flags = pflags;
}
/*
@@ -1097,11 +1105,15 @@ static void xs_tcp_connect_worker(void *
{
struct rpc_xprt *xprt = (struct rpc_xprt *)args;
struct socket *sock = xprt->sock;
+ unsigned long pflags = current->flags;
int err, status = -EIO;
if (xprt->shutdown || xprt->addr.sin_port == 0)
goto out;
+ if (xprt->swapper)
+ current->flags |= PF_MEMALLOC;
+
dprintk("RPC: xs_tcp_connect_worker for xprt %p\n", xprt);
if (!xprt->sock) {
@@ -1148,6 +1160,10 @@ static void xs_tcp_connect_worker(void *
write_unlock_bh(&sk->sk_callback_lock);
}
+
+ if (xprt->swapper)
+ sk_set_vmio(xprt->inet);
+
/* Tell the socket layer to start connecting... */
xprt->stat.connect_count++;
xprt->stat.connect_start = jiffies;
@@ -1174,6 +1190,7 @@ out:
xprt_wake_pending_tasks(xprt, status);
out_clear:
xprt_clear_connecting(xprt);
+ current->flags = pflags;
}
/**
@@ -1369,3 +1386,33 @@ int xs_setup_tcp(struct rpc_xprt *xprt,
return 0;
}
+
+#define RPC_BUF_RESERVE_PAGES (1) /* XXX: how many concurrent rpc buffers? */
+#define RPC_RESERVE_PAGES (RPC_BUF_RESERVE_PAGES + TX_RESERVE_PAGES)
+
+/**
+ * xs_swapper - Tag this transport as being used for swap.
+ * @xprt: transport to tag
+ * @enable: enable/disable
+ *
+ */
+int xs_swapper(struct rpc_xprt *xprt, int enable)
+{
+ int err = 0;
+
+ if (enable) {
+ /*
+ * keep one extra sock reference so the reserve won't dip
+ * when the socket gets reconnected.
+ */
+ sk_adjust_memalloc(1, RPC_RESERVE_PAGES);
+ sk_set_vmio(xprt->inet);
+ xprt->swapper = 1;
+ } else if (xprt->swapper) {
+ xprt->swapper = 0;
+ sk_clear_vmio(xprt->inet);
+ sk_adjust_memalloc(-1, -RPC_RESERVE_PAGES);
+ }
+
+ return err;
+}
Index: linux-2.6/include/linux/sunrpc/xprt.h
===================================================================
--- linux-2.6.orig/include/linux/sunrpc/xprt.h
+++ linux-2.6/include/linux/sunrpc/xprt.h
@@ -147,7 +147,9 @@ struct rpc_xprt {
unsigned int max_reqs; /* total slots */
unsigned long state; /* transport state */
unsigned char shutdown : 1, /* being shut down */
- resvport : 1; /* use a reserved port */
+ resvport : 1, /* use a reserved port */
+ swapper : 1; /* we're swapping over this
+ transport */
/*
* XID
@@ -261,6 +263,7 @@ void xprt_disconnect(struct rpc_xprt *
*/
int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to);
int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to);
+int xs_swapper(struct rpc_xprt *xprt, int enable);
/*
* Reserved bit positions in xprt->state
Index: linux-2.6/net/sunrpc/sched.c
===================================================================
--- linux-2.6.orig/net/sunrpc/sched.c
+++ linux-2.6/net/sunrpc/sched.c
@@ -736,8 +736,8 @@ void * rpc_malloc(struct rpc_task *task,
struct rpc_rqst *req = task->tk_rqstp;
gfp_t gfp;
- if (task->tk_flags & RPC_TASK_SWAPPER)
- gfp = GFP_ATOMIC;
+ if (RPC_IS_SWAPPER(task))
+ gfp = GFP_ATOMIC | __GFP_EMERGENCY;
else
gfp = GFP_NOFS;
--
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2006-09-12 15:51 UTC|newest]
Thread overview: 74+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-09-12 15:25 [PATCH 00/20] vm deadlock avoidance for NFS, NBD and iSCSI (take 7) Peter Zijlstra
2006-09-12 15:25 ` Peter Zijlstra
2006-09-12 15:25 ` [PATCH 02/20] net: vm deadlock avoidance core Peter Zijlstra
2006-09-12 15:25 ` Peter Zijlstra
2006-09-12 15:25 ` [PATCH 18/20] netlink: add SOCK_VMIO support to AF_NETLINK Peter Zijlstra
2006-09-12 15:25 ` Peter Zijlstra
2006-09-12 15:25 ` [PATCH 01/20] mm: serialize access to min_free_kbytes Peter Zijlstra
2006-09-12 15:25 ` Peter Zijlstra
2006-09-12 15:25 ` Peter Zijlstra [this message]
2006-09-12 15:25 ` [PATCH 09/20] nfs: make swap on NFS robust Peter Zijlstra
2006-09-12 15:25 ` [PATCH 08/20] nfs: enable swap on NFS Peter Zijlstra
2006-09-12 15:25 ` Peter Zijlstra
2006-09-12 15:25 ` [PATCH 15/20] iscsi: kernel side tcp connect Peter Zijlstra
2006-09-12 15:25 ` Peter Zijlstra
2006-09-12 15:25 ` [PATCH 05/20] uml: rename arch/um remove_mapping() Peter Zijlstra
2006-09-12 15:25 ` Peter Zijlstra
2006-09-12 15:25 ` [PATCH 06/20] nfs: teach the NFS client how to treat PG_swapcache pages Peter Zijlstra
2006-09-12 15:25 ` Peter Zijlstra
2006-09-12 15:25 ` [PATCH 16/20] iscsi: add session context to ep_connect Peter Zijlstra
2006-09-12 15:25 ` Peter Zijlstra
2006-09-12 15:25 ` [PATCH 13/20] nbd: use swapdev hook to make swap deadlock free Peter Zijlstra
2006-09-12 15:25 ` Peter Zijlstra
2006-09-12 15:25 ` [PATCH 04/20] mm: methods for teaching filesystems about PG_swapcache pages Peter Zijlstra
2006-09-12 15:25 ` Peter Zijlstra
2006-09-12 15:25 ` [PATCH 19/20] mm: a process flags to avoid blocking allocations Peter Zijlstra
2006-09-12 15:25 ` Peter Zijlstra
2006-09-12 15:25 ` [PATCH 11/20] nbd: request_fn fixup Peter Zijlstra
2006-09-12 15:25 ` Peter Zijlstra
2006-09-12 22:47 ` Jens Axboe
2006-09-12 22:47 ` Jens Axboe
2006-09-13 0:21 ` Jeff Garzik
2006-09-13 0:21 ` Jeff Garzik
2006-09-13 6:14 ` Jens Axboe
2006-09-13 6:14 ` Jens Axboe
2006-09-12 15:25 ` [PATCH 14/20] uml: enable scsi and add iscsi config Peter Zijlstra
2006-09-12 15:25 ` Peter Zijlstra
2006-09-12 15:25 ` [PATCH 12/20] nbd: limit blk_queue Peter Zijlstra
2006-09-12 15:25 ` Peter Zijlstra
2006-09-12 22:47 ` Jens Axboe
2006-09-12 22:47 ` Jens Axboe
2006-09-12 15:25 ` [PATCH 03/20] mm: add support for non block device backed swap files Peter Zijlstra
2006-09-12 15:25 ` Peter Zijlstra
2006-09-12 15:25 ` [PATCH 07/20] nfs: add a comment explaining the use of PG_private in the NFS client Peter Zijlstra
2006-09-12 15:25 ` Peter Zijlstra
2006-09-12 15:25 ` [PATCH 17/20] scsi: propagate the swapdev hook into the scsi stack Peter Zijlstra
2006-09-12 15:25 ` Peter Zijlstra
2006-09-12 15:25 ` [PATCH 20/20] iscsi: support for swapping over iSCSI Peter Zijlstra
2006-09-12 15:25 ` Peter Zijlstra
2006-09-13 20:50 ` Mike Christie
2006-09-13 20:50 ` Mike Christie
2006-09-14 6:17 ` Peter Zijlstra
2006-09-14 6:17 ` Peter Zijlstra
2006-09-14 19:22 ` Mike Christie
2006-09-14 19:22 ` Mike Christie
2006-09-14 20:35 ` Peter Zijlstra
2006-09-14 20:35 ` Peter Zijlstra
2006-09-14 20:46 ` Peter Zijlstra
2006-09-14 20:46 ` Peter Zijlstra
2006-09-14 21:09 ` Mike Christie
2006-09-14 21:09 ` Mike Christie
2006-09-14 21:28 ` Mike Christie
2006-09-14 21:28 ` Mike Christie
2006-09-14 21:00 ` Mike Christie
2006-09-14 21:00 ` Mike Christie
2006-09-14 21:03 ` Mike Christie
2006-09-14 21:03 ` Mike Christie
2006-09-14 21:18 ` Peter Zijlstra
2006-09-14 21:18 ` Peter Zijlstra
2006-09-12 15:25 ` [PATCH 10/20] mm: block device swap notification Peter Zijlstra
2006-09-12 15:25 ` Peter Zijlstra
2006-09-12 16:37 ` [PATCH 00/20] vm deadlock avoidance for NFS, NBD and iSCSI (take 7) Linus Torvalds
2006-09-12 16:37 ` Linus Torvalds
2006-09-12 23:58 ` Nate Diller
2006-09-12 23:58 ` Nate Diller
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20060912144903.985364000@chello.nl \
--to=a.p.zijlstra@chello.nl \
--cc=akpm@osdl.org \
--cc=davem@davemloft.net \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=netdev@vger.kernel.org \
--cc=phillips@google.com \
--cc=riel@redhat.com \
--cc=torvalds@osdl.org \
--cc=trond.myklebust@fys.uio.no \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.