All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] Fix xprt_bindresvport range
@ 2005-02-23 16:28 Olaf Kirch
  2005-02-23 16:35 ` Trond Myklebust
  0 siblings, 1 reply; 13+ messages in thread
From: Olaf Kirch @ 2005-02-23 16:28 UTC (permalink / raw)
  To: nfs

xprt_bindresvport would grab ports in the range 1-800, which can
conflict with all sorts of network services. What we really want
to do is select from a range N - 1023.

The patch below changes xprt_bindresvport to select ports from
650-1023 by default (631 is cups, which we better avoid).
It also adds sysctls to allow the admin to select a different
port range.

Signed-off-by: Olaf Kirch <okir@suse.de>

Index: linux-2.6.10/include/linux/sunrpc/debug.h
===================================================================
--- linux-2.6.10.orig/include/linux/sunrpc/debug.h	2004-10-18 23:55:36.000000000 +0200
+++ linux-2.6.10/include/linux/sunrpc/debug.h	2005-02-23 17:18:55.000000000 +0100
@@ -94,6 +94,8 @@ enum {
 	CTL_NLMDEBUG,
 	CTL_SLOTTABLE_UDP,
 	CTL_SLOTTABLE_TCP,
+	CTL_MIN_RESVPORT,
+	CTL_MAX_RESVPORT,
 };
 
 #endif /* _LINUX_SUNRPC_DEBUG_H_ */
Index: linux-2.6.10/include/linux/sunrpc/xprt.h
===================================================================
--- linux-2.6.10.orig/include/linux/sunrpc/xprt.h	2005-02-23 17:17:55.000000000 +0100
+++ linux-2.6.10/include/linux/sunrpc/xprt.h	2005-02-23 17:19:21.000000000 +0100
@@ -224,6 +224,9 @@ void			xprt_sock_setbufsize(struct rpc_x
 					(test_and_clear_bit(XPRT_CONNECT, &(xp)->sockstate))
 #define xprt_clear_connected(xp)	(clear_bit(XPRT_CONNECT, &(xp)->sockstate))
 
+extern unsigned int	xprt_min_resvport;
+extern unsigned int	xprt_max_resvport;
+
 #endif /* __KERNEL__*/
 
 #endif /* _LINUX_SUNRPC_XPRT_H */
Index: linux-2.6.10/net/sunrpc/sysctl.c
===================================================================
--- linux-2.6.10.orig/net/sunrpc/sysctl.c	2005-02-23 17:17:55.000000000 +0100
+++ linux-2.6.10/net/sunrpc/sysctl.c	2005-02-23 17:19:35.000000000 +0100
@@ -29,6 +29,10 @@ unsigned int	nfs_debug;
 unsigned int	nfsd_debug;
 unsigned int	nlm_debug;
 
+unsigned int	xprt_min_resvport = 650;
+unsigned int	xprt_max_resvport = 1023;
+
+
 #ifdef RPC_DEBUG
 
 static struct ctl_table_header *sunrpc_table_header;
@@ -121,6 +125,8 @@ done:
 
 static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE;
 static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;
+static unsigned int xprt_min_resvport_limit = 1;
+static unsigned int xprt_max_resvport_limit = 65535;
 
 static ctl_table debug_table[] = {
 	{
@@ -156,6 +162,28 @@ static ctl_table debug_table[] = {
 		.proc_handler	= &proc_dodebug
 	}, 
 	{
+		.ctl_name	= CTL_MIN_RESVPORT,
+		.procname	= "min_resvport",
+		.data		= &xprt_min_resvport,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &xprt_min_resvport_limit,
+		.extra2		= &xprt_max_resvport_limit
+	},
+	{
+		.ctl_name	= CTL_MAX_RESVPORT,
+		.procname	= "max_resvport",
+		.data		= &xprt_max_resvport,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.strategy	= &sysctl_intvec,
+		.extra1		= &xprt_min_resvport_limit,
+		.extra2		= &xprt_max_resvport_limit
+	},
+	{
 		.ctl_name	= CTL_SLOTTABLE_UDP,
 		.procname	= "udp_slot_table_entries",
 		.data		= &xprt_udp_slot_table_entries,
Index: linux-2.6.10/net/sunrpc/xprt.c
===================================================================
--- linux-2.6.10.orig/net/sunrpc/xprt.c	2005-02-23 17:17:55.000000000 +0100
+++ linux-2.6.10/net/sunrpc/xprt.c	2005-02-23 17:18:08.000000000 +0100
@@ -75,7 +75,6 @@
 
 #define XPRT_MAX_BACKOFF	(8)
 #define XPRT_IDLE_TIMEOUT	(5*60*HZ)
-#define XPRT_MAX_RESVPORT	(800)
 
 /*
  * Local functions
@@ -1492,7 +1491,7 @@ xprt_setup(int proto, struct sockaddr_in
 	xprt->timer.function = xprt_init_autodisconnect;
 	xprt->timer.data = (unsigned long) xprt;
 	xprt->last_used = jiffies;
-	xprt->port = XPRT_MAX_RESVPORT;
+	xprt->port = xprt_max_resvport;
 
 	/* Set timeout parameters */
 	if (to) {
@@ -1540,8 +1539,10 @@ static inline int xprt_bindresvport(stru
 			xprt->port = port;
 			return 0;
 		}
-		if (--port == 0)
-			port = XPRT_MAX_RESVPORT;
+		if (port <= xprt_min_resvport)
+			port = xprt_max_resvport;
+		else
+			port--;
 	} while (err == -EADDRINUSE && port != xprt->port);
 
 	printk("RPC: Can't bind to reserved port (%d).\n", -err);
-- 
Olaf Kirch   |  --- o --- Nous sommes du soleil we love when we play
okir@suse.de |    / | \   sol.dhoop.naytheet.ah kin.ir.samse.qurax


-------------------------------------------------------
SF email is sponsored by - The IT Product Guide
Read honest & candid reviews on hundreds of IT Products from real users.
Discover which products truly live up to the hype. Start reading now.
http://ads.osdn.com/?ad_id=6595&alloc_id=14396&op=click
_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

^ permalink raw reply	[flat|nested] 13+ messages in thread
* RE: [PATCH] Fix xprt_bindresvport range
@ 2005-02-23 17:05 Lever, Charles
  0 siblings, 0 replies; 13+ messages in thread
From: Lever, Charles @ 2005-02-23 17:05 UTC (permalink / raw)
  To: Trond Myklebust, Olaf Kirch; +Cc: nfs

> on den 23.02.2005 Klokka 17:28 (+0100) skreiv Olaf Kirch:
> > xprt_bindresvport would grab ports in the range 1-800, which can
> > conflict with all sorts of network services. What we really want
> > to do is select from a range N - 1023.
> >=20
> > The patch below changes xprt_bindresvport to select ports from
> > 650-1023 by default (631 is cups, which we better avoid).

note some mainboard's with built-in ethernet and IPMI implementations
quietly use port 623; incoming UDP traffic on 623 appears to be blocked
to the O/S.  so 650 is a good choice.

> > It also adds sysctls to allow the admin to select a different
> > port range.
>=20
> The patch looks good, and this is definitely a feature we should have.
>=20
> Chuck, how will this fit in with your transport switch work?

it fits fine.  i can add it to my transport switch patchset, or you can
take it now and i will work the transport switch around it.

one problem with sysctls and the transport switch is i'm not sure how to
add new sysctls dynamically when a transport module is loaded.  so far
i've just hacked around it (for the slot table size sysctl).  i'm sure
olaf's is the first of many patches that will add another sysctl
parameter to the RPC client.  any ideas?


-------------------------------------------------------
SF email is sponsored by - The IT Product Guide
Read honest & candid reviews on hundreds of IT Products from real users.
Discover which products truly live up to the hype. Start reading now.
http://ads.osdn.com/?ad_id=6595&alloc_id=14396&op=click
_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

^ permalink raw reply	[flat|nested] 13+ messages in thread
* RE: [PATCH] Fix xprt_bindresvport range
@ 2005-02-28 16:41 Lever, Charles
  2005-02-28 16:57 ` Olaf Kirch
  0 siblings, 1 reply; 13+ messages in thread
From: Lever, Charles @ 2005-02-28 16:41 UTC (permalink / raw)
  To: Olaf Kirch, Trond Myklebust; +Cc: Ian Kent, Charles Lever, nfs

hi olaf-

> I understand the theoretical reasons, but I wonder about the=20
> practical value. Any problem that causes retransmits over TCP=20
> is non-transient, otherwise TCP retransmit would simply fix=20
> it for us (question - there doesn't seem to be any code in=20
> sunrpc that breaks the connection after a timeout, major or=20
> minor: I think we want that)

this is actually required for NFSv4.

the new RPC client transport switch patches implement this feature.
after a major timeout, the FSM invokes a transport switch callout.  for
TCP sockets, this callout will invoke xprt_disconnect.  we may want to
go farther and actually close the socket at that point instead of just
marking it as a zombie, just to speed the reconnect process along.

Sun's RPC/TCP implementation allows two retransmits over a TCP transport
before closing it.  by setting retrans=3D2 and timeo=3D600, and =
including
this "disconnect on timeout" feature, we model the reference
implementation's behavior.

> To make matters worse, the Linux client is fairly=20
> conservative on TCP reconnects.  TCP connects will time out=20
> after 60 seconds, and if the first reconnect fails, we'll=20
> delay future reconnects by another 15 seconds.
> In general I believe this is A Good Thing, but this timing=20
> pattern will make sure there's not a shred of valid data left=20
> in your retransmit cache by the time the connection comes back...

the new RPC client transport switch patches also implement an
exponential backoff when retrying to connect a TCP transport socket,
replacing the current fixed 15 second timeout.  it starts (currently) at
3 seconds and caps at 60 seconds.

by adding in features such as SO_REUSEADDR and limiting the linger
timeout on TCP connections, it's more reasonable to expect that the DRC
will be effective during a TCP timeout.

> And this is not theoretical; I've seen this happen in our R&D network.
> The client/nfsd thread ratio was too high, so that nfsd=20
> started to drop connections frequently.  That caused=20
> interesting failures with silly renames that were real fun to debug :)

with NFSv4.1 sessions, the DRC size is capped.  by using a sequence
number, sessions allow the server to retire cache entries sanely after
an RPC is completed on the client, so this problem gets a lot easier.


-------------------------------------------------------
SF email is sponsored by - The IT Product Guide
Read honest & candid reviews on hundreds of IT Products from real users.
Discover which products truly live up to the hype. Start reading now.
http://ads.osdn.com/?ad_id=6595&alloc_id=14396&op=click
_______________________________________________
NFS maillist  -  NFS@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nfs

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2005-02-28 16:57 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-02-23 16:28 [PATCH] Fix xprt_bindresvport range Olaf Kirch
2005-02-23 16:35 ` Trond Myklebust
2005-02-24  2:59   ` Ian Kent
2005-02-24  4:23     ` Trond Myklebust
2005-02-24 10:14       ` Olaf Kirch
2005-02-24 18:12         ` Trond Myklebust
2005-02-25  9:28           ` Olaf Kirch
2005-02-25 17:30             ` Trond Myklebust
2005-02-28 11:33               ` Olaf Kirch
2005-02-25  0:54       ` Ian Kent
  -- strict thread matches above, loose matches on Subject: below --
2005-02-23 17:05 Lever, Charles
2005-02-28 16:41 Lever, Charles
2005-02-28 16:57 ` Olaf Kirch

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.