* [PATCH 000 of 4] knfsd: revised patches to fix IPv6/sunrpc link error and other fixes
@ 2007-03-06 2:15 ` NeilBrown
0 siblings, 0 replies; 12+ messages in thread
From: NeilBrown @ 2007-03-06 2:15 UTC (permalink / raw)
To: Andrew Morton; +Cc: nfs, linux-kernel
Revised version of previous patches:
- Fixed the 'nfsd hang when stopped' problem
- Dropped the 'avoid checking udp checksum' as I'm no longer convinced it is
the right way to go.
- Added a paranoia check that the control message returned by recvfrom
is in-fact a PKTINFO message.
Tested that a udp mount still works, and the the nfsd server can be stopped and
restarted successfully. Found that it still doesn't if there is active
traffic so fixed that.
Also patch from Greg Banks to make the thread-pool stuff non-default as
it is not optimal on all machines, and often needs tuning where it can
be optimal.
Thanks,
NeilBrown
[PATCH 001 of 4] knfsd: Use recv_msg to get peer address for NFSD instead of code-copying
[PATCH 002 of 4] knfsd: Remove CONFIG_IPV6 ifdefs from sunrpc server code.
[PATCH 003 of 4] knfsd: Fix recently introduced problem with shutting down a busy NFS server.
[PATCH 004 of 4] knfsd: Provide sunrpc pool_mode module option
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 001 of 4] knfsd: Use recv_msg to get peer address for NFSD instead of code-copying
2007-03-06 2:15 ` NeilBrown
(?)
@ 2007-03-06 2:15 ` NeilBrown
-1 siblings, 0 replies; 12+ messages in thread
From: NeilBrown @ 2007-03-06 2:15 UTC (permalink / raw)
To: Andrew Morton; +Cc: nfs, linux-kernel
The sunrpc server code needs to know the source and destination address
for UDP packets so it can reply properly.
It currently copies code out of the network stack to pick the pieces out
of the skb.
This is ugly and causes compile problems with the IPv6 stuff.
So, rip that out and use recv_msg instead. This is a much cleaner
interface, but has a slight cost in that the checksum is now checked
before the copy, so we don't benefit from doing both at the same time.
This can probably be fixed.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./net/sunrpc/svcsock.c | 72 +++++++++++++++++++++++++++----------------------
1 file changed, 40 insertions(+), 32 deletions(-)
diff .prev/net/sunrpc/svcsock.c ./net/sunrpc/svcsock.c
--- .prev/net/sunrpc/svcsock.c 2007-03-06 11:02:55.000000000 +1100
+++ ./net/sunrpc/svcsock.c 2007-03-06 11:10:09.000000000 +1100
@@ -721,45 +721,23 @@ svc_write_space(struct sock *sk)
}
}
-static void svc_udp_get_sender_address(struct svc_rqst *rqstp,
- struct sk_buff *skb)
+static inline void svc_udp_get_dest_address(struct svc_rqst *rqstp,
+ struct cmsghdr *cmh)
{
switch (rqstp->rq_sock->sk_sk->sk_family) {
case AF_INET: {
- /* this seems to come from net/ipv4/udp.c:udp_recvmsg */
- struct sockaddr_in *sin = svc_addr_in(rqstp);
-
- sin->sin_family = AF_INET;
- sin->sin_port = skb->h.uh->source;
- sin->sin_addr.s_addr = skb->nh.iph->saddr;
- rqstp->rq_addrlen = sizeof(struct sockaddr_in);
- /* Remember which interface received this request */
- rqstp->rq_daddr.addr.s_addr = skb->nh.iph->daddr;
- }
+ struct in_pktinfo *pki = CMSG_DATA(cmh);
+ rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr;
break;
+ }
#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case AF_INET6: {
- /* this is derived from net/ipv6/udp.c:udpv6_recvmesg */
- struct sockaddr_in6 *sin6 = svc_addr_in6(rqstp);
-
- sin6->sin6_family = AF_INET6;
- sin6->sin6_port = skb->h.uh->source;
- sin6->sin6_flowinfo = 0;
- sin6->sin6_scope_id = 0;
- if (ipv6_addr_type(&sin6->sin6_addr) &
- IPV6_ADDR_LINKLOCAL)
- sin6->sin6_scope_id = IP6CB(skb)->iif;
- ipv6_addr_copy(&sin6->sin6_addr,
- &skb->nh.ipv6h->saddr);
- rqstp->rq_addrlen = sizeof(struct sockaddr_in);
- /* Remember which interface received this request */
- ipv6_addr_copy(&rqstp->rq_daddr.addr6,
- &skb->nh.ipv6h->saddr);
- }
+ struct in6_pktinfo *pki = CMSG_DATA(cmh);
+ ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr);
break;
+ }
#endif
}
- return;
}
/*
@@ -771,7 +749,15 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
struct svc_sock *svsk = rqstp->rq_sock;
struct svc_serv *serv = svsk->sk_server;
struct sk_buff *skb;
+ char buffer[CMSG_SPACE(sizeof(union svc_pktinfo_u))];
+ struct cmsghdr *cmh = (struct cmsghdr *)buffer;
int err, len;
+ struct msghdr msg = {
+ .msg_name = svc_addr(rqstp),
+ .msg_control = cmh,
+ .msg_controllen = sizeof(buffer),
+ .msg_flags = MSG_DONTWAIT,
+ };
if (test_and_clear_bit(SK_CHNGBUF, &svsk->sk_flags))
/* udp sockets need large rcvbuf as all pending
@@ -797,7 +783,9 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
}
clear_bit(SK_DATA, &svsk->sk_flags);
- while ((skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) {
+ while ((err == kernel_recvmsg(svsk->sk_sock, &msg, NULL,
+ 0, 0, MSG_PEEK | MSG_DONTWAIT)) < 0 ||
+ (skb = skb_recv_datagram(svsk->sk_sk, 0, 1, &err)) == NULL) {
if (err == -EAGAIN) {
svc_sock_received(svsk);
return err;
@@ -805,6 +793,7 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
/* possibly an icmp error */
dprintk("svc: recvfrom returned error %d\n", -err);
}
+ rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
if (skb->tstamp.off_sec == 0) {
struct timeval tv;
@@ -827,7 +816,16 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_prot = IPPROTO_UDP;
- svc_udp_get_sender_address(rqstp, skb);
+ if (cmh->cmsg_level != IPPROTO_IP ||
+ cmh->cmsg_type != IP_PKTINFO) {
+ if (net_ratelimit())
+ printk("rpcsvc: received unknown control message:"
+ "%d/%d\n",
+ cmh->cmsg_level, cmh->cmsg_type);
+ skb_free_datagram(svsk->sk_sk, skb);
+ return 0;
+ }
+ svc_udp_get_dest_address(rqstp, cmh);
if (skb_is_nonlinear(skb)) {
/* we have to copy */
@@ -884,6 +882,9 @@ svc_udp_sendto(struct svc_rqst *rqstp)
static void
svc_udp_init(struct svc_sock *svsk)
{
+ int one = 1;
+ mm_segment_t oldfs;
+
svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
svsk->sk_sk->sk_write_space = svc_write_space;
svsk->sk_recvfrom = svc_udp_recvfrom;
@@ -899,6 +900,13 @@ svc_udp_init(struct svc_sock *svsk)
set_bit(SK_DATA, &svsk->sk_flags); /* might have come in before data_ready set up */
set_bit(SK_CHNGBUF, &svsk->sk_flags);
+
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ /* make sure we get destination address info */
+ svsk->sk_sock->ops->setsockopt(svsk->sk_sock, IPPROTO_IP, IP_PKTINFO,
+ (char __user *)&one, sizeof(one));
+ set_fs(oldfs);
}
/*
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH 002 of 4] knfsd: Remove CONFIG_IPV6 ifdefs from sunrpc server code.
2007-03-06 2:15 ` NeilBrown
@ 2007-03-06 2:15 ` NeilBrown
-1 siblings, 0 replies; 12+ messages in thread
From: NeilBrown @ 2007-03-06 2:15 UTC (permalink / raw)
To: Andrew Morton; +Cc: nfs, linux-kernel
They don't really save that much, and aren't worth the hassle.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./include/linux/sunrpc/svc.h | 2 --
./net/sunrpc/svcsock.c | 13 +++----------
2 files changed, 3 insertions(+), 12 deletions(-)
diff .prev/include/linux/sunrpc/svc.h ./include/linux/sunrpc/svc.h
--- .prev/include/linux/sunrpc/svc.h 2007-03-06 11:02:46.000000000 +1100
+++ ./include/linux/sunrpc/svc.h 2007-03-06 11:12:40.000000000 +1100
@@ -194,9 +194,7 @@ static inline void svc_putu32(struct kve
union svc_addr_u {
struct in_addr addr;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
struct in6_addr addr6;
-#endif
};
/*
diff .prev/net/sunrpc/svcsock.c ./net/sunrpc/svcsock.c
--- .prev/net/sunrpc/svcsock.c 2007-03-06 11:10:09.000000000 +1100
+++ ./net/sunrpc/svcsock.c 2007-03-06 11:12:40.000000000 +1100
@@ -131,13 +131,13 @@ static char *__svc_print_addr(struct soc
NIPQUAD(((struct sockaddr_in *) addr)->sin_addr),
htons(((struct sockaddr_in *) addr)->sin_port));
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+
case AF_INET6:
snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u",
NIP6(((struct sockaddr_in6 *) addr)->sin6_addr),
htons(((struct sockaddr_in6 *) addr)->sin6_port));
break;
-#endif
+
default:
snprintf(buf, len, "unknown address type: %d", addr->sa_family);
break;
@@ -449,9 +449,7 @@ svc_wake_up(struct svc_serv *serv)
union svc_pktinfo_u {
struct in_pktinfo pkti;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
struct in6_pktinfo pkti6;
-#endif
};
static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
@@ -467,7 +465,7 @@ static void svc_set_cmsg_data(struct svc
cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
}
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+
case AF_INET6: {
struct in6_pktinfo *pki = CMSG_DATA(cmh);
@@ -479,7 +477,6 @@ static void svc_set_cmsg_data(struct svc
cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
}
break;
-#endif
}
return;
}
@@ -730,13 +727,11 @@ static inline void svc_udp_get_dest_addr
rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr;
break;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case AF_INET6: {
struct in6_pktinfo *pki = CMSG_DATA(cmh);
ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr);
break;
}
-#endif
}
}
@@ -985,11 +980,9 @@ static inline int svc_port_is_privileged
case AF_INET:
return ntohs(((struct sockaddr_in *)sin)->sin_port)
< PROT_SOCK;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case AF_INET6:
return ntohs(((struct sockaddr_in6 *)sin)->sin6_port)
< PROT_SOCK;
-#endif
default:
return 0;
}
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys-and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
NFS maillist - NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH 002 of 4] knfsd: Remove CONFIG_IPV6 ifdefs from sunrpc server code.
@ 2007-03-06 2:15 ` NeilBrown
0 siblings, 0 replies; 12+ messages in thread
From: NeilBrown @ 2007-03-06 2:15 UTC (permalink / raw)
To: Andrew Morton; +Cc: nfs, linux-kernel
They don't really save that much, and aren't worth the hassle.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./include/linux/sunrpc/svc.h | 2 --
./net/sunrpc/svcsock.c | 13 +++----------
2 files changed, 3 insertions(+), 12 deletions(-)
diff .prev/include/linux/sunrpc/svc.h ./include/linux/sunrpc/svc.h
--- .prev/include/linux/sunrpc/svc.h 2007-03-06 11:02:46.000000000 +1100
+++ ./include/linux/sunrpc/svc.h 2007-03-06 11:12:40.000000000 +1100
@@ -194,9 +194,7 @@ static inline void svc_putu32(struct kve
union svc_addr_u {
struct in_addr addr;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
struct in6_addr addr6;
-#endif
};
/*
diff .prev/net/sunrpc/svcsock.c ./net/sunrpc/svcsock.c
--- .prev/net/sunrpc/svcsock.c 2007-03-06 11:10:09.000000000 +1100
+++ ./net/sunrpc/svcsock.c 2007-03-06 11:12:40.000000000 +1100
@@ -131,13 +131,13 @@ static char *__svc_print_addr(struct soc
NIPQUAD(((struct sockaddr_in *) addr)->sin_addr),
htons(((struct sockaddr_in *) addr)->sin_port));
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+
case AF_INET6:
snprintf(buf, len, "%x:%x:%x:%x:%x:%x:%x:%x, port=%u",
NIP6(((struct sockaddr_in6 *) addr)->sin6_addr),
htons(((struct sockaddr_in6 *) addr)->sin6_port));
break;
-#endif
+
default:
snprintf(buf, len, "unknown address type: %d", addr->sa_family);
break;
@@ -449,9 +449,7 @@ svc_wake_up(struct svc_serv *serv)
union svc_pktinfo_u {
struct in_pktinfo pkti;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
struct in6_pktinfo pkti6;
-#endif
};
static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
@@ -467,7 +465,7 @@ static void svc_set_cmsg_data(struct svc
cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
}
break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+
case AF_INET6: {
struct in6_pktinfo *pki = CMSG_DATA(cmh);
@@ -479,7 +477,6 @@ static void svc_set_cmsg_data(struct svc
cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
}
break;
-#endif
}
return;
}
@@ -730,13 +727,11 @@ static inline void svc_udp_get_dest_addr
rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr;
break;
}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case AF_INET6: {
struct in6_pktinfo *pki = CMSG_DATA(cmh);
ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr);
break;
}
-#endif
}
}
@@ -985,11 +980,9 @@ static inline int svc_port_is_privileged
case AF_INET:
return ntohs(((struct sockaddr_in *)sin)->sin_port)
< PROT_SOCK;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
case AF_INET6:
return ntohs(((struct sockaddr_in6 *)sin)->sin6_port)
< PROT_SOCK;
-#endif
default:
return 0;
}
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 003 of 4] knfsd: Fix recently introduced problem with shutting down a busy NFS server.
2007-03-06 2:15 ` NeilBrown
@ 2007-03-06 2:15 ` NeilBrown
-1 siblings, 0 replies; 12+ messages in thread
From: NeilBrown @ 2007-03-06 2:15 UTC (permalink / raw)
To: Andrew Morton; +Cc: nfs, linux-kernel
When the last thread of nfsd exits, it shuts down all related sockets.
It currently uses svc_close_socket to do this, but that only is
immediately effective if the socket is not SK_BUSY.
If the socket is busy - i.e. if a request has arrived that has not yet
been processes - svc_close_socket is not effective and the shutdown
process spins.
So create a new svc_force_close_socket which removes the SK_BUSY flag
is set and then calls svc_close_socket.
Also change some open-codes loops in svc_destroy to use
list_for_each_entry_safe.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./include/linux/sunrpc/svcsock.h | 2 +-
./net/sunrpc/svc.c | 21 +++++++++------------
./net/sunrpc/svcsock.c | 16 +++++++++++++++-
3 files changed, 25 insertions(+), 14 deletions(-)
diff .prev/include/linux/sunrpc/svcsock.h ./include/linux/sunrpc/svcsock.h
--- .prev/include/linux/sunrpc/svcsock.h 2007-03-06 12:19:18.000000000 +1100
+++ ./include/linux/sunrpc/svcsock.h 2007-03-06 12:19:23.000000000 +1100
@@ -66,7 +66,7 @@ struct svc_sock {
* Function prototypes.
*/
int svc_makesock(struct svc_serv *, int, unsigned short, int flags);
-void svc_close_socket(struct svc_sock *);
+void svc_force_close_socket(struct svc_sock *);
int svc_recv(struct svc_rqst *, long);
int svc_send(struct svc_rqst *);
void svc_drop(struct svc_rqst *);
diff .prev/net/sunrpc/svc.c ./net/sunrpc/svc.c
--- .prev/net/sunrpc/svc.c 2007-03-06 12:02:18.000000000 +1100
+++ ./net/sunrpc/svc.c 2007-03-06 12:27:12.000000000 +1100
@@ -367,6 +367,7 @@ void
svc_destroy(struct svc_serv *serv)
{
struct svc_sock *svsk;
+ struct svc_sock *tmp;
dprintk("svc: svc_destroy(%s, %d)\n",
serv->sv_program->pg_name,
@@ -382,21 +383,17 @@ svc_destroy(struct svc_serv *serv)
del_timer_sync(&serv->sv_temptimer);
- while (!list_empty(&serv->sv_tempsocks)) {
- svsk = list_entry(serv->sv_tempsocks.next,
- struct svc_sock,
- sk_list);
- svc_close_socket(svsk);
- }
+ list_for_each_entry_safe(svsk, tmp, &serv->sv_tempsocks, sk_list)
+ svc_force_close_socket(svsk);
+
if (serv->sv_shutdown)
serv->sv_shutdown(serv);
- while (!list_empty(&serv->sv_permsocks)) {
- svsk = list_entry(serv->sv_permsocks.next,
- struct svc_sock,
- sk_list);
- svc_close_socket(svsk);
- }
+ list_for_each_entry_safe(svsk, tmp, &serv->sv_permsocks, sk_list)
+ svc_force_close_socket(svsk);
+
+ BUG_ON(!list_empty(&serv->sv_permsocks));
+ BUG_ON(!list_empty(&serv->sv_tempsocks));
cache_clean_deferred(serv);
diff .prev/net/sunrpc/svcsock.c ./net/sunrpc/svcsock.c
--- .prev/net/sunrpc/svcsock.c 2007-03-06 11:12:40.000000000 +1100
+++ ./net/sunrpc/svcsock.c 2007-03-06 12:36:20.000000000 +1100
@@ -82,6 +82,7 @@ static void svc_delete_socket(struct sv
static void svc_udp_data_ready(struct sock *, int);
static int svc_udp_recvfrom(struct svc_rqst *);
static int svc_udp_sendto(struct svc_rqst *);
+static void svc_close_socket(struct svc_sock *svsk);
static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk);
static int svc_deferred_recv(struct svc_rqst *rqstp);
@@ -1787,7 +1788,7 @@ svc_delete_socket(struct svc_sock *svsk)
spin_unlock_bh(&serv->sv_lock);
}
-void svc_close_socket(struct svc_sock *svsk)
+static void svc_close_socket(struct svc_sock *svsk)
{
set_bit(SK_CLOSE, &svsk->sk_flags);
if (test_and_set_bit(SK_BUSY, &svsk->sk_flags))
@@ -1800,6 +1801,19 @@ void svc_close_socket(struct svc_sock *s
svc_sock_put(svsk);
}
+void svc_force_close_socket(struct svc_sock *svsk)
+{
+ set_bit(SK_CLOSE, &svsk->sk_flags);
+ if (test_bit(SK_BUSY, &svsk->sk_flags)) {
+ /* Waiting to be processed, but no threads left,
+ * So just remove it from the waiting list
+ */
+ list_del_init(&svsk->sk_ready);
+ clear_bit(SK_BUSY, &svsk->sk_flags);
+ }
+ svc_close_socket(svsk);
+}
+
/**
* svc_makesock - Make a socket for nfsd and lockd
* @serv: RPC server structure
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys-and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
NFS maillist - NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH 003 of 4] knfsd: Fix recently introduced problem with shutting down a busy NFS server.
@ 2007-03-06 2:15 ` NeilBrown
0 siblings, 0 replies; 12+ messages in thread
From: NeilBrown @ 2007-03-06 2:15 UTC (permalink / raw)
To: Andrew Morton; +Cc: nfs, linux-kernel
When the last thread of nfsd exits, it shuts down all related sockets.
It currently uses svc_close_socket to do this, but that only is
immediately effective if the socket is not SK_BUSY.
If the socket is busy - i.e. if a request has arrived that has not yet
been processes - svc_close_socket is not effective and the shutdown
process spins.
So create a new svc_force_close_socket which removes the SK_BUSY flag
is set and then calls svc_close_socket.
Also change some open-codes loops in svc_destroy to use
list_for_each_entry_safe.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./include/linux/sunrpc/svcsock.h | 2 +-
./net/sunrpc/svc.c | 21 +++++++++------------
./net/sunrpc/svcsock.c | 16 +++++++++++++++-
3 files changed, 25 insertions(+), 14 deletions(-)
diff .prev/include/linux/sunrpc/svcsock.h ./include/linux/sunrpc/svcsock.h
--- .prev/include/linux/sunrpc/svcsock.h 2007-03-06 12:19:18.000000000 +1100
+++ ./include/linux/sunrpc/svcsock.h 2007-03-06 12:19:23.000000000 +1100
@@ -66,7 +66,7 @@ struct svc_sock {
* Function prototypes.
*/
int svc_makesock(struct svc_serv *, int, unsigned short, int flags);
-void svc_close_socket(struct svc_sock *);
+void svc_force_close_socket(struct svc_sock *);
int svc_recv(struct svc_rqst *, long);
int svc_send(struct svc_rqst *);
void svc_drop(struct svc_rqst *);
diff .prev/net/sunrpc/svc.c ./net/sunrpc/svc.c
--- .prev/net/sunrpc/svc.c 2007-03-06 12:02:18.000000000 +1100
+++ ./net/sunrpc/svc.c 2007-03-06 12:27:12.000000000 +1100
@@ -367,6 +367,7 @@ void
svc_destroy(struct svc_serv *serv)
{
struct svc_sock *svsk;
+ struct svc_sock *tmp;
dprintk("svc: svc_destroy(%s, %d)\n",
serv->sv_program->pg_name,
@@ -382,21 +383,17 @@ svc_destroy(struct svc_serv *serv)
del_timer_sync(&serv->sv_temptimer);
- while (!list_empty(&serv->sv_tempsocks)) {
- svsk = list_entry(serv->sv_tempsocks.next,
- struct svc_sock,
- sk_list);
- svc_close_socket(svsk);
- }
+ list_for_each_entry_safe(svsk, tmp, &serv->sv_tempsocks, sk_list)
+ svc_force_close_socket(svsk);
+
if (serv->sv_shutdown)
serv->sv_shutdown(serv);
- while (!list_empty(&serv->sv_permsocks)) {
- svsk = list_entry(serv->sv_permsocks.next,
- struct svc_sock,
- sk_list);
- svc_close_socket(svsk);
- }
+ list_for_each_entry_safe(svsk, tmp, &serv->sv_permsocks, sk_list)
+ svc_force_close_socket(svsk);
+
+ BUG_ON(!list_empty(&serv->sv_permsocks));
+ BUG_ON(!list_empty(&serv->sv_tempsocks));
cache_clean_deferred(serv);
diff .prev/net/sunrpc/svcsock.c ./net/sunrpc/svcsock.c
--- .prev/net/sunrpc/svcsock.c 2007-03-06 11:12:40.000000000 +1100
+++ ./net/sunrpc/svcsock.c 2007-03-06 12:36:20.000000000 +1100
@@ -82,6 +82,7 @@ static void svc_delete_socket(struct sv
static void svc_udp_data_ready(struct sock *, int);
static int svc_udp_recvfrom(struct svc_rqst *);
static int svc_udp_sendto(struct svc_rqst *);
+static void svc_close_socket(struct svc_sock *svsk);
static struct svc_deferred_req *svc_deferred_dequeue(struct svc_sock *svsk);
static int svc_deferred_recv(struct svc_rqst *rqstp);
@@ -1787,7 +1788,7 @@ svc_delete_socket(struct svc_sock *svsk)
spin_unlock_bh(&serv->sv_lock);
}
-void svc_close_socket(struct svc_sock *svsk)
+static void svc_close_socket(struct svc_sock *svsk)
{
set_bit(SK_CLOSE, &svsk->sk_flags);
if (test_and_set_bit(SK_BUSY, &svsk->sk_flags))
@@ -1800,6 +1801,19 @@ void svc_close_socket(struct svc_sock *s
svc_sock_put(svsk);
}
+void svc_force_close_socket(struct svc_sock *svsk)
+{
+ set_bit(SK_CLOSE, &svsk->sk_flags);
+ if (test_bit(SK_BUSY, &svsk->sk_flags)) {
+ /* Waiting to be processed, but no threads left,
+ * So just remove it from the waiting list
+ */
+ list_del_init(&svsk->sk_ready);
+ clear_bit(SK_BUSY, &svsk->sk_flags);
+ }
+ svc_close_socket(svsk);
+}
+
/**
* svc_makesock - Make a socket for nfsd and lockd
* @serv: RPC server structure
^ permalink raw reply [flat|nested] 12+ messages in thread
* [PATCH 004 of 4] knfsd: Provide sunrpc pool_mode module option
2007-03-06 2:15 ` NeilBrown
@ 2007-03-06 2:15 ` NeilBrown
-1 siblings, 0 replies; 12+ messages in thread
From: NeilBrown @ 2007-03-06 2:15 UTC (permalink / raw)
To: Andrew Morton; +Cc: nfs, linux-kernel
From: Greg Banks <gnb@sgi.com>
Provide a module param "pool_mode" for sunrpc.ko which allows a
sysadmin to choose the mode for mapping NFS thread service pools
to CPUs. Values are:
auto choose a mapping mode heuristically
global (default, same as the pre-2.6.19 code) a single global pool
percpu one pool per CPU
pernode one pool per NUMA node
Note that since 2.6.19 the hardcoded behaviour has been "auto",
this patch makes the default "global".
The pool mode can be changed after boot/modprobe using /sys, if the
NFS and lockd services have been shut down. A useful side effect of
this change is to fix a small memory leak when unloading the module.
Signed-off-by: Greg Banks <gnb@melbourne.sgi.com>
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./Documentation/kernel-parameters.txt | 16 ++++
./net/sunrpc/svc.c | 133 +++++++++++++++++++++++++++++-----
2 files changed, 130 insertions(+), 19 deletions(-)
diff .prev/Documentation/kernel-parameters.txt ./Documentation/kernel-parameters.txt
--- .prev/Documentation/kernel-parameters.txt 2007-03-06 12:45:18.000000000 +1100
+++ ./Documentation/kernel-parameters.txt 2007-03-06 12:45:23.000000000 +1100
@@ -1692,6 +1692,22 @@ and is between 256 and 4096 characters.
stifb= [HW]
Format: bpp:<bpp1>[:<bpp2>[:<bpp3>...]]
+ sunrpc.pool_mode=
+ [NFS]
+ Control how the NFS server code allocates CPUs to
+ service thread pools. Depending on how many NICs
+ you have and where their interrupts are bound, this
+ option will affect which CPUs will do NFS serving.
+ Note: this parameter cannot be changed while the
+ NFS server is running.
+
+ auto the server chooses an appropriate mode
+ automatically using heuristics
+ global a single global pool contains all CPUs
+ percpu one pool for each CPU
+ pernode one pool for each NUMA node (equivalent
+ to global on non-NUMA machines)
+
swiotlb= [IA-64] Number of I/O TLB slabs
switches= [HW,M68k]
diff .prev/net/sunrpc/svc.c ./net/sunrpc/svc.c
--- .prev/net/sunrpc/svc.c 2007-03-06 12:27:12.000000000 +1100
+++ ./net/sunrpc/svc.c 2007-03-06 12:45:44.000000000 +1100
@@ -27,22 +27,26 @@
#define RPCDBG_FACILITY RPCDBG_SVCDSP
+#define svc_serv_is_pooled(serv) ((serv)->sv_function)
+
/*
* Mode for mapping cpus to pools.
*/
enum {
- SVC_POOL_NONE = -1, /* uninitialised, choose one of the others */
+ SVC_POOL_AUTO = -1, /* choose one of the others */
SVC_POOL_GLOBAL, /* no mapping, just a single global pool
* (legacy & UP mode) */
SVC_POOL_PERCPU, /* one pool per cpu */
SVC_POOL_PERNODE /* one pool per numa node */
};
+#define SVC_POOL_DEFAULT SVC_POOL_GLOBAL
/*
* Structure for mapping cpus to pools and vice versa.
* Setup once during sunrpc initialisation.
*/
static struct svc_pool_map {
+ int count; /* How many svc_servs use us */
int mode; /* Note: int not enum to avoid
* warnings about "enumeration value
* not handled in switch" */
@@ -50,9 +54,63 @@ static struct svc_pool_map {
unsigned int *pool_to; /* maps pool id to cpu or node */
unsigned int *to_pool; /* maps cpu or node to pool id */
} svc_pool_map = {
- .mode = SVC_POOL_NONE
+ .count = 0,
+ .mode = SVC_POOL_DEFAULT
};
+static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
+
+static int
+param_set_pool_mode(const char *val, struct kernel_param *kp)
+{
+ int *ip = (int *)kp->arg;
+ struct svc_pool_map *m = &svc_pool_map;
+ int err;
+
+ mutex_lock(&svc_pool_map_mutex);
+
+ err = -EBUSY;
+ if (m->count)
+ goto out;
+
+ err = 0;
+ if (!strncmp(val, "auto", 4))
+ *ip = SVC_POOL_AUTO;
+ else if (!strncmp(val, "global", 6))
+ *ip = SVC_POOL_GLOBAL;
+ else if (!strncmp(val, "percpu", 6))
+ *ip = SVC_POOL_PERCPU;
+ else if (!strncmp(val, "pernode", 7))
+ *ip = SVC_POOL_PERNODE;
+ else
+ err = -EINVAL;
+
+out:
+ mutex_unlock(&svc_pool_map_mutex);
+ return err;
+}
+
+static int
+param_get_pool_mode(char *buf, struct kernel_param *kp)
+{
+ int *ip = (int *)kp->arg;
+
+ switch (*ip)
+ {
+ case SVC_POOL_AUTO:
+ return strlcpy(buf, "auto", 20);
+ case SVC_POOL_GLOBAL:
+ return strlcpy(buf, "global", 20);
+ case SVC_POOL_PERCPU:
+ return strlcpy(buf, "percpu", 20);
+ case SVC_POOL_PERNODE:
+ return strlcpy(buf, "pernode", 20);
+ default:
+ return sprintf(buf, "%d", *ip);
+ }
+}
+module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode,
+ &svc_pool_map.mode, 0644);
/*
* Detect best pool mapping mode heuristically,
@@ -166,18 +224,25 @@ svc_pool_map_init_pernode(struct svc_poo
/*
- * Build the global map of cpus to pools and vice versa.
+ * Add a reference to the global map of cpus to pools (and
+ * vice versa). Initialise the map if we're the first user.
+ * Returns the number of pools.
*/
static unsigned int
-svc_pool_map_init(void)
+svc_pool_map_get(void)
{
struct svc_pool_map *m = &svc_pool_map;
int npools = -1;
- if (m->mode != SVC_POOL_NONE)
+ mutex_lock(&svc_pool_map_mutex);
+
+ if (m->count++) {
+ mutex_unlock(&svc_pool_map_mutex);
return m->npools;
+ }
- m->mode = svc_pool_map_choose_mode();
+ if (m->mode == SVC_POOL_AUTO)
+ m->mode = svc_pool_map_choose_mode();
switch (m->mode) {
case SVC_POOL_PERCPU:
@@ -195,9 +260,36 @@ svc_pool_map_init(void)
}
m->npools = npools;
+ mutex_unlock(&svc_pool_map_mutex);
return m->npools;
}
+
+/*
+ * Drop a reference to the global map of cpus to pools.
+ * When the last reference is dropped, the map data is
+ * freed; this allows the sysadmin to change the pool
+ * mode using the pool_mode module option without
+ * rebooting or re-loading sunrpc.ko.
+ */
+static void
+svc_pool_map_put(void)
+{
+ struct svc_pool_map *m = &svc_pool_map;
+
+ mutex_lock(&svc_pool_map_mutex);
+
+ if (!--m->count) {
+ m->mode = SVC_POOL_DEFAULT;
+ kfree(m->to_pool);
+ kfree(m->pool_to);
+ m->npools = 0;
+ }
+
+ mutex_unlock(&svc_pool_map_mutex);
+}
+
+
/*
* Set the current thread's cpus_allowed mask so that it
* will only run on cpus in the given pool.
@@ -212,10 +304,9 @@ svc_pool_map_set_cpumask(unsigned int pi
/*
* The caller checks for sv_nrpools > 1, which
- * implies that we've been initialized and the
- * map mode is not NONE.
+ * implies that we've been initialized.
*/
- BUG_ON(m->mode == SVC_POOL_NONE);
+ BUG_ON(m->count == 0);
switch (m->mode)
{
@@ -246,18 +337,19 @@ svc_pool_for_cpu(struct svc_serv *serv,
unsigned int pidx = 0;
/*
- * SVC_POOL_NONE happens in a pure client when
+ * An uninitialised map happens in a pure client when
* lockd is brought up, so silently treat it the
* same as SVC_POOL_GLOBAL.
*/
-
- switch (m->mode) {
- case SVC_POOL_PERCPU:
- pidx = m->to_pool[cpu];
- break;
- case SVC_POOL_PERNODE:
- pidx = m->to_pool[cpu_to_node(cpu)];
- break;
+ if (svc_serv_is_pooled(serv)) {
+ switch (m->mode) {
+ case SVC_POOL_PERCPU:
+ pidx = m->to_pool[cpu];
+ break;
+ case SVC_POOL_PERNODE:
+ pidx = m->to_pool[cpu_to_node(cpu)];
+ break;
+ }
}
return &serv->sv_pools[pidx % serv->sv_nrpools];
}
@@ -347,7 +439,7 @@ svc_create_pooled(struct svc_program *pr
svc_thread_fn func, int sig, struct module *mod)
{
struct svc_serv *serv;
- unsigned int npools = svc_pool_map_init();
+ unsigned int npools = svc_pool_map_get();
serv = __svc_create(prog, bufsize, npools, shutdown);
@@ -397,6 +489,9 @@ svc_destroy(struct svc_serv *serv)
cache_clean_deferred(serv);
+ if (svc_serv_is_pooled(serv))
+ svc_pool_map_put();
+
/* Unregister service with the portmapper */
svc_register(serv, 0, 0);
kfree(serv->sv_pools);
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys-and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
NFS maillist - NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH 004 of 4] knfsd: Provide sunrpc pool_mode module option
@ 2007-03-06 2:15 ` NeilBrown
0 siblings, 0 replies; 12+ messages in thread
From: NeilBrown @ 2007-03-06 2:15 UTC (permalink / raw)
To: Andrew Morton; +Cc: nfs, linux-kernel
From: Greg Banks <gnb@sgi.com>
Provide a module param "pool_mode" for sunrpc.ko which allows a
sysadmin to choose the mode for mapping NFS thread service pools
to CPUs. Values are:
auto choose a mapping mode heuristically
global (default, same as the pre-2.6.19 code) a single global pool
percpu one pool per CPU
pernode one pool per NUMA node
Note that since 2.6.19 the hardcoded behaviour has been "auto",
this patch makes the default "global".
The pool mode can be changed after boot/modprobe using /sys, if the
NFS and lockd services have been shut down. A useful side effect of
this change is to fix a small memory leak when unloading the module.
Signed-off-by: Greg Banks <gnb@melbourne.sgi.com>
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./Documentation/kernel-parameters.txt | 16 ++++
./net/sunrpc/svc.c | 133 +++++++++++++++++++++++++++++-----
2 files changed, 130 insertions(+), 19 deletions(-)
diff .prev/Documentation/kernel-parameters.txt ./Documentation/kernel-parameters.txt
--- .prev/Documentation/kernel-parameters.txt 2007-03-06 12:45:18.000000000 +1100
+++ ./Documentation/kernel-parameters.txt 2007-03-06 12:45:23.000000000 +1100
@@ -1692,6 +1692,22 @@ and is between 256 and 4096 characters.
stifb= [HW]
Format: bpp:<bpp1>[:<bpp2>[:<bpp3>...]]
+ sunrpc.pool_mode=
+ [NFS]
+ Control how the NFS server code allocates CPUs to
+ service thread pools. Depending on how many NICs
+ you have and where their interrupts are bound, this
+ option will affect which CPUs will do NFS serving.
+ Note: this parameter cannot be changed while the
+ NFS server is running.
+
+ auto the server chooses an appropriate mode
+ automatically using heuristics
+ global a single global pool contains all CPUs
+ percpu one pool for each CPU
+ pernode one pool for each NUMA node (equivalent
+ to global on non-NUMA machines)
+
swiotlb= [IA-64] Number of I/O TLB slabs
switches= [HW,M68k]
diff .prev/net/sunrpc/svc.c ./net/sunrpc/svc.c
--- .prev/net/sunrpc/svc.c 2007-03-06 12:27:12.000000000 +1100
+++ ./net/sunrpc/svc.c 2007-03-06 12:45:44.000000000 +1100
@@ -27,22 +27,26 @@
#define RPCDBG_FACILITY RPCDBG_SVCDSP
+#define svc_serv_is_pooled(serv) ((serv)->sv_function)
+
/*
* Mode for mapping cpus to pools.
*/
enum {
- SVC_POOL_NONE = -1, /* uninitialised, choose one of the others */
+ SVC_POOL_AUTO = -1, /* choose one of the others */
SVC_POOL_GLOBAL, /* no mapping, just a single global pool
* (legacy & UP mode) */
SVC_POOL_PERCPU, /* one pool per cpu */
SVC_POOL_PERNODE /* one pool per numa node */
};
+#define SVC_POOL_DEFAULT SVC_POOL_GLOBAL
/*
* Structure for mapping cpus to pools and vice versa.
* Setup once during sunrpc initialisation.
*/
static struct svc_pool_map {
+ int count; /* How many svc_servs use us */
int mode; /* Note: int not enum to avoid
* warnings about "enumeration value
* not handled in switch" */
@@ -50,9 +54,63 @@ static struct svc_pool_map {
unsigned int *pool_to; /* maps pool id to cpu or node */
unsigned int *to_pool; /* maps cpu or node to pool id */
} svc_pool_map = {
- .mode = SVC_POOL_NONE
+ .count = 0,
+ .mode = SVC_POOL_DEFAULT
};
+static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */
+
+static int
+param_set_pool_mode(const char *val, struct kernel_param *kp)
+{
+ int *ip = (int *)kp->arg;
+ struct svc_pool_map *m = &svc_pool_map;
+ int err;
+
+ mutex_lock(&svc_pool_map_mutex);
+
+ err = -EBUSY;
+ if (m->count)
+ goto out;
+
+ err = 0;
+ if (!strncmp(val, "auto", 4))
+ *ip = SVC_POOL_AUTO;
+ else if (!strncmp(val, "global", 6))
+ *ip = SVC_POOL_GLOBAL;
+ else if (!strncmp(val, "percpu", 6))
+ *ip = SVC_POOL_PERCPU;
+ else if (!strncmp(val, "pernode", 7))
+ *ip = SVC_POOL_PERNODE;
+ else
+ err = -EINVAL;
+
+out:
+ mutex_unlock(&svc_pool_map_mutex);
+ return err;
+}
+
+static int
+param_get_pool_mode(char *buf, struct kernel_param *kp)
+{
+ int *ip = (int *)kp->arg;
+
+ switch (*ip)
+ {
+ case SVC_POOL_AUTO:
+ return strlcpy(buf, "auto", 20);
+ case SVC_POOL_GLOBAL:
+ return strlcpy(buf, "global", 20);
+ case SVC_POOL_PERCPU:
+ return strlcpy(buf, "percpu", 20);
+ case SVC_POOL_PERNODE:
+ return strlcpy(buf, "pernode", 20);
+ default:
+ return sprintf(buf, "%d", *ip);
+ }
+}
+module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode,
+ &svc_pool_map.mode, 0644);
/*
* Detect best pool mapping mode heuristically,
@@ -166,18 +224,25 @@ svc_pool_map_init_pernode(struct svc_poo
/*
- * Build the global map of cpus to pools and vice versa.
+ * Add a reference to the global map of cpus to pools (and
+ * vice versa). Initialise the map if we're the first user.
+ * Returns the number of pools.
*/
static unsigned int
-svc_pool_map_init(void)
+svc_pool_map_get(void)
{
struct svc_pool_map *m = &svc_pool_map;
int npools = -1;
- if (m->mode != SVC_POOL_NONE)
+ mutex_lock(&svc_pool_map_mutex);
+
+ if (m->count++) {
+ mutex_unlock(&svc_pool_map_mutex);
return m->npools;
+ }
- m->mode = svc_pool_map_choose_mode();
+ if (m->mode == SVC_POOL_AUTO)
+ m->mode = svc_pool_map_choose_mode();
switch (m->mode) {
case SVC_POOL_PERCPU:
@@ -195,9 +260,36 @@ svc_pool_map_init(void)
}
m->npools = npools;
+ mutex_unlock(&svc_pool_map_mutex);
return m->npools;
}
+
+/*
+ * Drop a reference to the global map of cpus to pools.
+ * When the last reference is dropped, the map data is
+ * freed; this allows the sysadmin to change the pool
+ * mode using the pool_mode module option without
+ * rebooting or re-loading sunrpc.ko.
+ */
+static void
+svc_pool_map_put(void)
+{
+ struct svc_pool_map *m = &svc_pool_map;
+
+ mutex_lock(&svc_pool_map_mutex);
+
+ if (!--m->count) {
+ m->mode = SVC_POOL_DEFAULT;
+ kfree(m->to_pool);
+ kfree(m->pool_to);
+ m->npools = 0;
+ }
+
+ mutex_unlock(&svc_pool_map_mutex);
+}
+
+
/*
* Set the current thread's cpus_allowed mask so that it
* will only run on cpus in the given pool.
@@ -212,10 +304,9 @@ svc_pool_map_set_cpumask(unsigned int pi
/*
* The caller checks for sv_nrpools > 1, which
- * implies that we've been initialized and the
- * map mode is not NONE.
+ * implies that we've been initialized.
*/
- BUG_ON(m->mode == SVC_POOL_NONE);
+ BUG_ON(m->count == 0);
switch (m->mode)
{
@@ -246,18 +337,19 @@ svc_pool_for_cpu(struct svc_serv *serv,
unsigned int pidx = 0;
/*
- * SVC_POOL_NONE happens in a pure client when
+ * An uninitialised map happens in a pure client when
* lockd is brought up, so silently treat it the
* same as SVC_POOL_GLOBAL.
*/
-
- switch (m->mode) {
- case SVC_POOL_PERCPU:
- pidx = m->to_pool[cpu];
- break;
- case SVC_POOL_PERNODE:
- pidx = m->to_pool[cpu_to_node(cpu)];
- break;
+ if (svc_serv_is_pooled(serv)) {
+ switch (m->mode) {
+ case SVC_POOL_PERCPU:
+ pidx = m->to_pool[cpu];
+ break;
+ case SVC_POOL_PERNODE:
+ pidx = m->to_pool[cpu_to_node(cpu)];
+ break;
+ }
}
return &serv->sv_pools[pidx % serv->sv_nrpools];
}
@@ -347,7 +439,7 @@ svc_create_pooled(struct svc_program *pr
svc_thread_fn func, int sig, struct module *mod)
{
struct svc_serv *serv;
- unsigned int npools = svc_pool_map_init();
+ unsigned int npools = svc_pool_map_get();
serv = __svc_create(prog, bufsize, npools, shutdown);
@@ -397,6 +489,9 @@ svc_destroy(struct svc_serv *serv)
cache_clean_deferred(serv);
+ if (svc_serv_is_pooled(serv))
+ svc_pool_map_put();
+
/* Unregister service with the portmapper */
svc_register(serv, 0, 0);
kfree(serv->sv_pools);
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [PATCH 004 of 4] knfsd: Provide sunrpc pool_mode module option
2007-03-06 2:15 ` NeilBrown
(?)
@ 2007-03-06 4:57 ` Andrew Morton
2007-03-06 5:11 ` Neil Brown
-1 siblings, 1 reply; 12+ messages in thread
From: Andrew Morton @ 2007-03-06 4:57 UTC (permalink / raw)
To: NeilBrown; +Cc: nfs, linux-kernel
On Tue, 6 Mar 2007 13:15:20 +1100 NeilBrown <neilb@suse.de> wrote:
> Provide a module param "pool_mode" for sunrpc.ko which allows a
> sysadmin to choose the mode for mapping NFS thread service pools
> to CPUs. Values are:
>
> auto choose a mapping mode heuristically
> global (default, same as the pre-2.6.19 code) a single global pool
> percpu one pool per CPU
> pernode one pool per NUMA node
>
> Note that since 2.6.19 the hardcoded behaviour has been "auto",
> this patch makes the default "global".
>
> The pool mode can be changed after boot/modprobe using /sys, if the
> NFS and lockd services have been shut down. A useful side effect of
> this change is to fix a small memory leak when unloading the module.
Mutter. Is this really suitable and needed for 2.6.21 at this stage in
its life?
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 004 of 4] knfsd: Provide sunrpc pool_mode module option
2007-03-06 4:57 ` Andrew Morton
@ 2007-03-06 5:11 ` Neil Brown
0 siblings, 0 replies; 12+ messages in thread
From: Neil Brown @ 2007-03-06 5:11 UTC (permalink / raw)
To: Andrew Morton; +Cc: nfs, linux-kernel, Greg Banks
On Monday March 5, akpm@linux-foundation.org wrote:
> On Tue, 6 Mar 2007 13:15:20 +1100 NeilBrown <neilb@suse.de> wrote:
>
> > Provide a module param "pool_mode" for sunrpc.ko which allows a
> > sysadmin to choose the mode for mapping NFS thread service pools
> > to CPUs. Values are:
> >
> > auto choose a mapping mode heuristically
> > global (default, same as the pre-2.6.19 code) a single global pool
> > percpu one pool per CPU
> > pernode one pool per NUMA node
> >
> > Note that since 2.6.19 the hardcoded behaviour has been "auto",
> > this patch makes the default "global".
> >
> > The pool mode can be changed after boot/modprobe using /sys, if the
> > NFS and lockd services have been shut down. A useful side effect of
> > this change is to fix a small memory leak when unloading the module.
>
> Mutter. Is this really suitable and needed for 2.6.21 at this stage in
> its life?
Something is definitely needed. Currently on a 4-way SMP machine,
nfsd might only use 1 CPU (depends a bit on irq routing I think).
If that patch is too big, maybe this one?
NeilBrown
--------------------------------
Avoid using nfsd process pools on SMP machines.
From: Neil Brown <neilb@suse.de>
process-pools have real benefits for NUMA, but on SMP
machines they only work if network interface interrupts
go to all CPUs (via round-robin or multiple nics). This is
not always the case, so disable the pools in this case until
a better solution is developped.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./net/sunrpc/svc.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff .prev/net/sunrpc/svc.c ./net/sunrpc/svc.c
--- .prev/net/sunrpc/svc.c 2007-03-06 16:07:19.000000000 +1100
+++ ./net/sunrpc/svc.c 2007-03-06 16:08:53.000000000 +1100
@@ -79,7 +79,11 @@ svc_pool_map_choose_mode(void)
* x86_64 kernel on Xeons. In this case we
* want to divide the pools on cpu boundaries.
*/
- return SVC_POOL_PERCPU;
+ /* actually, unless your IRQs round-robin nicely,
+ * this turns out to be really bad, so just
+ * go GLOBAL for now until a better fix can be developped
+ */
+ return SVC_POOL_GLOBAL;
}
/* default: one global pool */
-------------------------------------------------------------------------
Take Surveys. Earn Cash. Influence the Future of IT
Join SourceForge.net's Techsay panel and you'll get the chance to share your
opinions on IT & business topics through brief surveys-and earn cash
http://www.techsay.com/default.php?page=join.php&p=sourceforge&CID=DEVDEV
_______________________________________________
NFS maillist - NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs
^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [PATCH 004 of 4] knfsd: Provide sunrpc pool_mode module option
@ 2007-03-06 5:11 ` Neil Brown
0 siblings, 0 replies; 12+ messages in thread
From: Neil Brown @ 2007-03-06 5:11 UTC (permalink / raw)
To: Andrew Morton; +Cc: Greg Banks, nfs, linux-kernel
On Monday March 5, akpm@linux-foundation.org wrote:
> On Tue, 6 Mar 2007 13:15:20 +1100 NeilBrown <neilb@suse.de> wrote:
>
> > Provide a module param "pool_mode" for sunrpc.ko which allows a
> > sysadmin to choose the mode for mapping NFS thread service pools
> > to CPUs. Values are:
> >
> > auto choose a mapping mode heuristically
> > global (default, same as the pre-2.6.19 code) a single global pool
> > percpu one pool per CPU
> > pernode one pool per NUMA node
> >
> > Note that since 2.6.19 the hardcoded behaviour has been "auto",
> > this patch makes the default "global".
> >
> > The pool mode can be changed after boot/modprobe using /sys, if the
> > NFS and lockd services have been shut down. A useful side effect of
> > this change is to fix a small memory leak when unloading the module.
>
> Mutter. Is this really suitable and needed for 2.6.21 at this stage in
> its life?
Something is definitely needed. Currently on a 4-way SMP machine,
nfsd might only use 1 CPU (depends a bit on irq routing I think).
If that patch is too big, maybe this one?
NeilBrown
--------------------------------
Avoid using nfsd process pools on SMP machines.
From: Neil Brown <neilb@suse.de>
process-pools have real benefits for NUMA, but on SMP
machines they only work if network interface interrupts
go to all CPUs (via round-robin or multiple nics). This is
not always the case, so disable the pools in this case until
a better solution is developped.
Signed-off-by: Neil Brown <neilb@suse.de>
### Diffstat output
./net/sunrpc/svc.c | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff .prev/net/sunrpc/svc.c ./net/sunrpc/svc.c
--- .prev/net/sunrpc/svc.c 2007-03-06 16:07:19.000000000 +1100
+++ ./net/sunrpc/svc.c 2007-03-06 16:08:53.000000000 +1100
@@ -79,7 +79,11 @@ svc_pool_map_choose_mode(void)
* x86_64 kernel on Xeons. In this case we
* want to divide the pools on cpu boundaries.
*/
- return SVC_POOL_PERCPU;
+ /* actually, unless your IRQs round-robin nicely,
+ * this turns out to be really bad, so just
+ * go GLOBAL for now until a better fix can be developped
+ */
+ return SVC_POOL_GLOBAL;
}
/* default: one global pool */
^ permalink raw reply [flat|nested] 12+ messages in thread