qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] PATCH: enabling TCP keepalives - v3
@ 2009-04-30 19:40 David Ahern
  2009-05-01 11:32 ` Richard W.M. Jones
  2009-05-01 14:43 ` Anthony Liguori
  0 siblings, 2 replies; 19+ messages in thread
From: David Ahern @ 2009-04-30 19:40 UTC (permalink / raw)
  To: qemu-devel

[-- Attachment #1: Type: text/plain, Size: 488 bytes --]

Did not see a response to the last version.

This patch enables TCP keepalives on VNC connections and TCP-based char
devices.

Default parameters have keep alive probes sent after 60-seconds of idle
time. Probes are sent every 12 seconds with the connection resetting
after 5 failed probes (ie., connection is closed if no response received
in 60-seconds).

Changes v2 -> v3
Removed duplicate definition and fixed typo in comments.

Signed-off-by: David Ahern <dsahern@gmail.com>


david

[-- Attachment #2: enable-tcp-keepalives-v3.patch --]
[-- Type: text/plain, Size: 4658 bytes --]

diff --git a/configure b/configure
index 82fb60a..41ba86a 100755
--- a/configure
+++ b/configure
@@ -1673,6 +1673,37 @@ if test "$fdt" = "yes" ; then
   echo "FDT_LIBS=-lfdt" >> $config_mak
 fi
 
+cat > $TMPC << EOF
+#include <sys/socket.h>
+int main(void) {
+  int val = 1;
+  if (setsockopt(0, SOL_SOCKET, SO_KEEPALIVE, &val, sizeof(val)) < 0)
+    return 1;
+  return 0;
+}
+EOF
+if $cc $ARCH_CFLAGS -o $TMPE $TMPC >/dev/null 2> /dev/null ; then
+  echo "#define HAVE_SO_KEEPALIVE 1" >> $config_h
+fi
+cat > $TMPC << EOF
+#include <sys/socket.h>
+#include <netinet/tcp.h>
+#include <netinet/in.h>
+int main(void) {
+  int val = 1;
+  if (setsockopt(0, IPPROTO_TCP, TCP_KEEPIDLE, &val, sizeof(val)) < 0)
+    return 1;
+  if (setsockopt(0, IPPROTO_TCP, TCP_KEEPINTVL, &val, sizeof(val)) < 0)
+    return 1;
+  if (setsockopt(0, IPPROTO_TCP, TCP_KEEPCNT, &val, sizeof(val)) < 0)
+    return 1;
+  return 0;
+}
+EOF
+if $cc $ARCH_CFLAGS -o $TMPE $TMPC >/dev/null 2> /dev/null ; then
+  echo "#define HAVE_TCP_KEEPXXXX 1" >> $config_h
+fi
+
 # XXX: suppress that
 if [ "$bsd" = "yes" ] ; then
   echo "#define O_LARGEFILE 0" >> $config_h
diff --git a/net.c b/net.c
index 7ae1e6d..bb85889 100644
--- a/net.c
+++ b/net.c
@@ -2209,3 +2209,36 @@ void net_client_check(void)
                     vlan->id);
     }
 }
+
+int enable_tcp_keepalive(int sd, int keepidle, int keepintvl, int keepcnt)
+{
+#ifdef HAVE_SO_KEEPALIVE
+    int val = 1;
+
+    if (setsockopt(sd, SOL_SOCKET, SO_KEEPALIVE, &val, sizeof(val)) < 0) {
+		return -1;
+    }
+
+#ifdef HAVE_TCP_KEEPXXXX
+    val = keepidle;
+    if ((val > 0) &&
+	    (setsockopt(sd, IPPROTO_TCP, TCP_KEEPIDLE, &val, sizeof(val)) < 0)) {
+        fprintf(stderr, "failed to set tcp idle interval on fd %d\n", sd);
+    }
+
+    val = keepintvl;
+    if ((val > 0) &&
+        (setsockopt(sd, IPPROTO_TCP, TCP_KEEPINTVL, &val, sizeof(val)) < 0)) {
+        fprintf(stderr, "failed to set tcp probe interval on fd %d\n", sd);
+    }
+
+    val = keepcnt;
+    if ((val > 0) &&
+        (setsockopt(sd, IPPROTO_TCP, TCP_KEEPCNT, &val, sizeof(val)) < 0)) {
+        fprintf(stderr, "failed to set tcp missed probe count on fd %d\n", sd);
+    }
+#endif
+#endif
+
+    return 0;
+}
diff --git a/net.h b/net.h
index cdf63a4..c7dbadf 100644
--- a/net.h
+++ b/net.h
@@ -119,6 +119,8 @@ void net_client_check(void);
 void net_host_device_add(Monitor *mon, const char *device, const char *opts);
 void net_host_device_remove(Monitor *mon, int vlan_id, const char *device);
 
+int enable_tcp_keepalive(int sd, int keepidle, int keepintvl, int keepcnt);
+
 #define DEFAULT_NETWORK_SCRIPT "/etc/qemu-ifup"
 #define DEFAULT_NETWORK_DOWN_SCRIPT "/etc/qemu-ifdown"
 #ifdef __sun__
diff --git a/qemu-char.c b/qemu-char.c
index 664cbfd..e1411c4 100644
--- a/qemu-char.c
+++ b/qemu-char.c
@@ -103,6 +103,14 @@
 
 #include "qemu_socket.h"
 
+/* timers for TCP keepalives: start probes after CHAR_TCP_KEEPIDLE
+ * seconds of no activity, send probes every CHAR_TCP_KEEPINTVL seconds,
+ * close connection after CHAR_TCP_KEEPCNT failed probes
+ */
+#define CHAR_TCP_KEEPIDLE  60
+#define CHAR_TCP_KEEPINTVL 12
+#define CHAR_TCP_KEEPCNT    5
+
 /***********************************************************/
 /* character device */
 
@@ -1990,6 +1998,10 @@ static void tcp_chr_accept(void *opaque)
         if (fd < 0 && errno != EINTR) {
             return;
         } else if (fd >= 0) {
+            if (enable_tcp_keepalive(fd, CHAR_TCP_KEEPIDLE,
+                                  CHAR_TCP_KEEPINTVL, CHAR_TCP_KEEPCNT) != 0) {
+                fprintf(stderr, "VNC: failed to enable TCP keep alives\n");
+            }
             if (s->do_telnetopt)
                 tcp_chr_telnet_init(fd);
             break;
diff --git a/vnc.c b/vnc.c
index ab1f044..c42cba9 100644
--- a/vnc.c
+++ b/vnc.c
@@ -32,6 +32,14 @@
 
 #define VNC_REFRESH_INTERVAL (1000 / 30)
 
+/* timers for TCP keepalives: start probes after VNC_TCP_KEEPIDLE
+ * seconds of no activity, send probes every VNC_TCP_KEEPINTVL seconds,
+ * close connection after VNC_TCP_KEEPCNT failed probes
+ */
+#define VNC_TCP_KEEPIDLE  60
+#define VNC_TCP_KEEPINTVL 12
+#define VNC_TCP_KEEPCNT    5
+
 #include "vnc_keysym.h"
 #include "d3des.h"
 
@@ -2021,6 +2029,11 @@ static void vnc_listen_read(void *opaque)
 
     int csock = accept(vs->lsock, (struct sockaddr *)&addr, &addrlen);
     if (csock != -1) {
+        if (enable_tcp_keepalive(csock, VNC_TCP_KEEPIDLE, 
+                                 VNC_TCP_KEEPINTVL, VNC_TCP_KEEPCNT) != 0) {
+            fprintf(stderr, "VNC: failed to enable TCP keep alives\n");
+        }
+
         vnc_connect(vs, csock);
     }
 }

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] PATCH: enabling TCP keepalives - v3
  2009-04-30 19:40 [Qemu-devel] PATCH: enabling TCP keepalives - v3 David Ahern
@ 2009-05-01 11:32 ` Richard W.M. Jones
  2009-05-01 12:23   ` Jamie Lokier
  2009-05-01 12:49   ` David Ahern
  2009-05-01 14:43 ` Anthony Liguori
  1 sibling, 2 replies; 19+ messages in thread
From: Richard W.M. Jones @ 2009-05-01 11:32 UTC (permalink / raw)
  To: David Ahern; +Cc: qemu-devel

On Thu, Apr 30, 2009 at 01:40:42PM -0600, David Ahern wrote:
> Did not see a response to the last version.
> 
> This patch enables TCP keepalives on VNC connections and TCP-based char
> devices.
> 
> Default parameters have keep alive probes sent after 60-seconds of idle
> time. Probes are sent every 12 seconds with the connection resetting
> after 5 failed probes (ie., connection is closed if no response received
> in 60-seconds).

IMHO this should be optional, and firmly default to _OFF_.  Brief
network outages shouldn't result in connections failing all over the
place.  In addition, does this negatively impact migration?

Rich.

-- 
Richard Jones, Emerging Technologies, Red Hat  http://et.redhat.com/~rjones
New in Fedora 11: Fedora Windows cross-compiler. Compile Windows
programs, test, and build Windows installers. Over 70 libraries supprt'd
http://fedoraproject.org/wiki/MinGW http://www.annexia.org/fedora_mingw

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] PATCH: enabling TCP keepalives - v3
  2009-05-01 11:32 ` Richard W.M. Jones
@ 2009-05-01 12:23   ` Jamie Lokier
  2009-05-01 12:49   ` David Ahern
  1 sibling, 0 replies; 19+ messages in thread
From: Jamie Lokier @ 2009-05-01 12:23 UTC (permalink / raw)
  To: Richard W.M. Jones; +Cc: qemu-devel, David Ahern

Richard W.M. Jones wrote:
> On Thu, Apr 30, 2009 at 01:40:42PM -0600, David Ahern wrote:
> > Did not see a response to the last version.
> > 
> > This patch enables TCP keepalives on VNC connections and TCP-based char
> > devices.
> > 
> > Default parameters have keep alive probes sent after 60-seconds of idle
> > time. Probes are sent every 12 seconds with the connection resetting
> > after 5 failed probes (ie., connection is closed if no response received
> > in 60-seconds).
> 
> IMHO this should be optional, and firmly default to _OFF_.  Brief
> network outages shouldn't result in connections failing all over the
> place.  In addition, does this negatively impact migration?

Note that if either side tries to send actual data, such as due to a
mouse movement or screen update, then connections will fail after a
TCP timeout anyway.  There's no getting away from a network outage
tripping it, unless the session is completely idle - static screen, no
input.

Keepalive just causes the connections to drop when there was no other
traffic to detect the connection being down.  It's being added because
without it, QEMU accumulates connections when the network is really is
gone - they never go away.

-- Jamie

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] PATCH: enabling TCP keepalives - v3
  2009-05-01 11:32 ` Richard W.M. Jones
  2009-05-01 12:23   ` Jamie Lokier
@ 2009-05-01 12:49   ` David Ahern
  2009-05-01 15:23     ` Daniel P. Berrange
  1 sibling, 1 reply; 19+ messages in thread
From: David Ahern @ 2009-05-01 12:49 UTC (permalink / raw)
  To: Richard W.M. Jones; +Cc: qemu-devel



Richard W.M. Jones wrote:
> On Thu, Apr 30, 2009 at 01:40:42PM -0600, David Ahern wrote:
>> Did not see a response to the last version.
>>
>> This patch enables TCP keepalives on VNC connections and TCP-based char
>> devices.
>>
>> Default parameters have keep alive probes sent after 60-seconds of idle
>> time. Probes are sent every 12 seconds with the connection resetting
>> after 5 failed probes (ie., connection is closed if no response received
>> in 60-seconds).
> 
> IMHO this should be optional, and firmly default to _OFF_.  Brief
> network outages shouldn't result in connections failing all over the
> place.  In addition, does this negatively impact migration?

It's not a matter of connections failing; it's a matter of cleaning them
up for a variety of reasons. Besides the VPN example which motivated
this patch (i.e, VPN connection drops and when re-established you get a
differnt IP), there are a lot of networks with very aggressive firewalls
(e.g., 60-minute timers). Without some sort of keepalive mechanisms
those firewalls will close the holes and the connections will hang.

I'll take a look at adding yet another command line option to enable
this. sshd for example, does not specify individual timer and count
values, only on/off. So for char devices, how about something like:

-serial tcp:<ip>:<port>[,server][,nowait][,tcpkeep]

-vnc display[,tcpkeep]

If timer and counters are to be configurable, I could do something like
tcpkeep=i,j,k, where i is the idle time, j is the interval for sending
probes and k is the count of missed probes.

I have not run, and not setup to run, migration tests. Will migrations
work as expected if the network were to stall for 2 minutes? The current
patch would only drop the connection after 2 minutes of no response.

david

> 
> Rich.
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] PATCH: enabling TCP keepalives - v3
  2009-04-30 19:40 [Qemu-devel] PATCH: enabling TCP keepalives - v3 David Ahern
  2009-05-01 11:32 ` Richard W.M. Jones
@ 2009-05-01 14:43 ` Anthony Liguori
  2009-05-01 14:47   ` David Ahern
  1 sibling, 1 reply; 19+ messages in thread
From: Anthony Liguori @ 2009-05-01 14:43 UTC (permalink / raw)
  To: David Ahern; +Cc: qemu-devel

David Ahern wrote:
> Did not see a response to the last version.
>
> This patch enables TCP keepalives on VNC connections and TCP-based char
> devices.
>
> Default parameters have keep alive probes sent after 60-seconds of idle
> time. Probes are sent every 12 seconds with the connection resetting
> after 5 failed probes (ie., connection is closed if no response received
> in 60-seconds).
>
> Changes v2 -> v3
> Removed duplicate definition and fixed typo in comments.
>
> Signed-off-by: David Ahern <dsahern@gmail.com>
>
>
> david
>   
This patch introduces a warning:

/home/anthony/git/qemu/vnc.c:2039: warning: implicit declaration of 
function ‘enable_tcp_keepalive’

-- 
Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] PATCH: enabling TCP keepalives - v3
  2009-05-01 14:43 ` Anthony Liguori
@ 2009-05-01 14:47   ` David Ahern
  2009-05-01 14:51     ` Anthony Liguori
  0 siblings, 1 reply; 19+ messages in thread
From: David Ahern @ 2009-05-01 14:47 UTC (permalink / raw)
  To: Anthony Liguori; +Cc: qemu-devel



Anthony Liguori wrote:
> David Ahern wrote:
>> Did not see a response to the last version.
>>
>> This patch enables TCP keepalives on VNC connections and TCP-based char
>> devices.
>>
>> Default parameters have keep alive probes sent after 60-seconds of idle
>> time. Probes are sent every 12 seconds with the connection resetting
>> after 5 failed probes (ie., connection is closed if no response received
>> in 60-seconds).
>>
>> Changes v2 -> v3
>> Removed duplicate definition and fixed typo in comments.
>>
>> Signed-off-by: David Ahern <dsahern@gmail.com>
>>
>>
>> david
>>   
> This patch introduces a warning:
> 
> /home/anthony/git/qemu/vnc.c:2039: warning: implicit declaration of
> function ‘enable_tcp_keepalive’
> 

Missed that. Forgot to add net.h to vnc.c.

Before I re-send a patch, what's the feeling regarding enabling this all
the time versus a command line option to control it?

david

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] PATCH: enabling TCP keepalives - v3
  2009-05-01 14:47   ` David Ahern
@ 2009-05-01 14:51     ` Anthony Liguori
  2009-05-01 15:16       ` Paul Brook
  0 siblings, 1 reply; 19+ messages in thread
From: Anthony Liguori @ 2009-05-01 14:51 UTC (permalink / raw)
  To: David Ahern; +Cc: qemu-devel

David Ahern wrote:
> Missed that. Forgot to add net.h to vnc.c.
>
> Before I re-send a patch, what's the feeling regarding enabling this all
> the time versus a command line option to control it?
>
> david
>   

I'm on the fence.  It's not something I think is extremely common and it 
seems like we're going to great lengths to fix up one persons broken 
configuration.

However, it really doesn't do any harm and I can understand the argument 
for it.  So far, I haven't seen anyone raise an issue with why this 
would not be a reasonable thing to do.

FWIW, in my opinion, it's an all or nothing thing.  It's not a knob I 
think we ought to be exposing.

-- 
Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] PATCH: enabling TCP keepalives - v3
  2009-05-01 14:51     ` Anthony Liguori
@ 2009-05-01 15:16       ` Paul Brook
  2009-05-01 15:57         ` Anthony Liguori
  0 siblings, 1 reply; 19+ messages in thread
From: Paul Brook @ 2009-05-01 15:16 UTC (permalink / raw)
  To: qemu-devel; +Cc: Anthony Liguori, David Ahern

On Friday 01 May 2009, Anthony Liguori wrote:
> David Ahern wrote:
> > Missed that. Forgot to add net.h to vnc.c.
> >
> > Before I re-send a patch, what's the feeling regarding enabling this all
> > the time versus a command line option to control it?
> >
> > david
>
> I'm on the fence.  It's not something I think is extremely common and it
> seems like we're going to great lengths to fix up one persons broken
> configuration.

I'm wary of enabling this by default. As someone else mentioned, having your 
TCP console connections spontaneously combust after 60 seconds can be 
somewhat surprising.

An option seems reasonable enough. We already have this for nodelay.

Paul

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] PATCH: enabling TCP keepalives - v3
  2009-05-01 12:49   ` David Ahern
@ 2009-05-01 15:23     ` Daniel P. Berrange
  2009-05-01 15:47       ` David Ahern
  2009-05-01 15:52       ` Avi Kivity
  0 siblings, 2 replies; 19+ messages in thread
From: Daniel P. Berrange @ 2009-05-01 15:23 UTC (permalink / raw)
  To: David Ahern; +Cc: Richard W.M. Jones, qemu-devel

On Fri, May 01, 2009 at 06:49:33AM -0600, David Ahern wrote:
> 
> 
> Richard W.M. Jones wrote:
> > On Thu, Apr 30, 2009 at 01:40:42PM -0600, David Ahern wrote:
> >> Did not see a response to the last version.
> >>
> >> This patch enables TCP keepalives on VNC connections and TCP-based char
> >> devices.
> >>
> >> Default parameters have keep alive probes sent after 60-seconds of idle
> >> time. Probes are sent every 12 seconds with the connection resetting
> >> after 5 failed probes (ie., connection is closed if no response received
> >> in 60-seconds).
> > 
> > IMHO this should be optional, and firmly default to _OFF_.  Brief
> > network outages shouldn't result in connections failing all over the
> > place.  In addition, does this negatively impact migration?
> 
> It's not a matter of connections failing; it's a matter of cleaning them
> up for a variety of reasons. Besides the VPN example which motivated
> this patch (i.e, VPN connection drops and when re-established you get a
> differnt IP), there are a lot of networks with very aggressive firewalls
> (e.g., 60-minute timers). Without some sort of keepalive mechanisms
> those firewalls will close the holes and the connections will hang.

You don't neccessarily always get a different IP for VPN connections,
as administrators may well choose to give users a fixed IP for their
VPN client. I'm not entirely against keepalives, but I thing making
it drop the connection after a mere 60 seconds is way too quick, if this
is enabled by default. I'd be more inclined to just have it use the
kernel defaults for timeouts

Daniel
-- 
|: Red Hat, Engineering, London   -o-   http://people.redhat.com/berrange/ :|
|: http://libvirt.org  -o-  http://virt-manager.org  -o-  http://ovirt.org :|
|: http://autobuild.org       -o-         http://search.cpan.org/~danberr/ :|
|: GnuPG: 7D3B9505  -o-  F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :|

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] PATCH: enabling TCP keepalives - v3
  2009-05-01 15:23     ` Daniel P. Berrange
@ 2009-05-01 15:47       ` David Ahern
  2009-05-01 17:21         ` Richard W.M. Jones
  2009-05-05  1:31         ` Jamie Lokier
  2009-05-01 15:52       ` Avi Kivity
  1 sibling, 2 replies; 19+ messages in thread
From: David Ahern @ 2009-05-01 15:47 UTC (permalink / raw)
  To: Daniel P. Berrange, Paul Brook; +Cc: Richard W.M. Jones, qemu-devel



Daniel P. Berrange wrote:
> On Fri, May 01, 2009 at 06:49:33AM -0600, David Ahern wrote:
>>
>> Richard W.M. Jones wrote:
>>> On Thu, Apr 30, 2009 at 01:40:42PM -0600, David Ahern wrote:
>>>> Did not see a response to the last version.
>>>>
>>>> This patch enables TCP keepalives on VNC connections and TCP-based char
>>>> devices.
>>>>
>>>> Default parameters have keep alive probes sent after 60-seconds of idle
>>>> time. Probes are sent every 12 seconds with the connection resetting
>>>> after 5 failed probes (ie., connection is closed if no response received
>>>> in 60-seconds).
>>> IMHO this should be optional, and firmly default to _OFF_.  Brief
>>> network outages shouldn't result in connections failing all over the
>>> place.  In addition, does this negatively impact migration?
>> It's not a matter of connections failing; it's a matter of cleaning them
>> up for a variety of reasons. Besides the VPN example which motivated
>> this patch (i.e, VPN connection drops and when re-established you get a
>> differnt IP), there are a lot of networks with very aggressive firewalls
>> (e.g., 60-minute timers). Without some sort of keepalive mechanisms
>> those firewalls will close the holes and the connections will hang.
> 
> You don't neccessarily always get a different IP for VPN connections,
> as administrators may well choose to give users a fixed IP for their
> VPN client. I'm not entirely against keepalives, but I thing making

Agreed, you don't always get a different IP on reconnects, but in my
case you do. Also, VPN users have no control over that; they just
see/cause dead connections.

> it drop the connection after a mere 60 seconds is way too quick, if this
> is enabled by default. I'd be more inclined to just have it use the
> kernel defaults for timeouts
> 
> Daniel

The parameters I put in cause a drop after 2 minutes of no response --
60 seconds of idle (no data through the socket) followed by 60 seconds
of failed probes. The default parameters for linux are harsh: 7 hours of
idle time before the first keepalive is sent.

Per an earlier email, I can add an option: tcpkeep=i,j,k where i is the
idle time, j is the interval for sending probes and k is the count of
missed probes, but I think this is getting to be overkill.

I'd prefer to have the dead sockets cleaned up, so I'll take enabling
keepalives with default parameters if that causes least resistance. With
qemu now supporting multiple VNC sessions, at least that offers an
option for recovery on that path.

david

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] PATCH: enabling TCP keepalives - v3
  2009-05-01 15:23     ` Daniel P. Berrange
  2009-05-01 15:47       ` David Ahern
@ 2009-05-01 15:52       ` Avi Kivity
  2009-05-01 16:11         ` John Haxby
  1 sibling, 1 reply; 19+ messages in thread
From: Avi Kivity @ 2009-05-01 15:52 UTC (permalink / raw)
  To: Daniel P. Berrange; +Cc: qemu-devel, Richard W.M. Jones, David Ahern

Daniel P. Berrange wrote:
> You don't neccessarily always get a different IP for VPN connections,
> as administrators may well choose to give users a fixed IP for their
> VPN client. I'm not entirely against keepalives, but I thing making
> it drop the connection after a mere 60 seconds is way too quick, if this
> is enabled by default. I'd be more inclined to just have it use the
> kernel defaults for timeouts
>   

That's around two hours.

I understand the wariness when it comes to dropping connections, but vnc 
is a reconnectable protocol; it isn't like you lose any data.  If the 
connection drops for two minutes it is useless anyway.

-- 
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] PATCH: enabling TCP keepalives - v3
  2009-05-01 15:16       ` Paul Brook
@ 2009-05-01 15:57         ` Anthony Liguori
  2009-05-01 16:04           ` Paul Brook
  0 siblings, 1 reply; 19+ messages in thread
From: Anthony Liguori @ 2009-05-01 15:57 UTC (permalink / raw)
  To: Paul Brook; +Cc: qemu-devel, David Ahern

Paul Brook wrote:
> On Friday 01 May 2009, Anthony Liguori wrote:
>   
>> David Ahern wrote:
>>     
>>> Missed that. Forgot to add net.h to vnc.c.
>>>
>>> Before I re-send a patch, what's the feeling regarding enabling this all
>>> the time versus a command line option to control it?
>>>
>>> david
>>>       
>> I'm on the fence.  It's not something I think is extremely common and it
>> seems like we're going to great lengths to fix up one persons broken
>> configuration.
>>     
>
> I'm wary of enabling this by default. As someone else mentioned, having your 
> TCP console connections spontaneously combust after 60 seconds can be 
> somewhat surprising.
>
> An option seems reasonable enough. We already have this for nodelay.
>   

I don't think an option is very useful.  If you lose your connection and 
now have a bunch of orphan connections, it's too late to have the 
foresight to have used an option on startup.  In that case, it would be 
much better to just be able to close existing connections.

I think there's a very, very small number of people that would have the 
foresight to always use keepalive=60 or whatever the option would be.

-- 
Regards,

Anthony Liguori

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] PATCH: enabling TCP keepalives - v3
  2009-05-01 15:57         ` Anthony Liguori
@ 2009-05-01 16:04           ` Paul Brook
  2009-05-01 16:11             ` David Ahern
  0 siblings, 1 reply; 19+ messages in thread
From: Paul Brook @ 2009-05-01 16:04 UTC (permalink / raw)
  To: qemu-devel; +Cc: Anthony Liguori, David Ahern

> I don't think an option is very useful.  If you lose your connection and
> now have a bunch of orphan connections, it's too late to have the
> foresight to have used an option on startup.  In that case, it would be
> much better to just be able to close existing connections.
>
> I think there's a very, very small number of people that would have the
> foresight to always use keepalive=60 or whatever the option would be.

In that case we should do nothing. Users with a flakey net connection can 
tweak their kernel (via /proc/sys) to use lower timeouts. AFAICS There's no 
way qemu can know what a "better" value is. As a concrete example my ADSL 
connection takes between 90 and 180 seconds to resync after a noise spike, so 
60 seconds is absolutely the wrong timeout value here.

Paul

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] PATCH: enabling TCP keepalives - v3
  2009-05-01 15:52       ` Avi Kivity
@ 2009-05-01 16:11         ` John Haxby
  2009-05-05  1:35           ` Jamie Lokier
  0 siblings, 1 reply; 19+ messages in thread
From: John Haxby @ 2009-05-01 16:11 UTC (permalink / raw)
  To: Avi Kivity; +Cc: David Ahern, qemu-devel, Richard W.M. Jones

Avi Kivity wrote:
> Daniel P. Berrange wrote:
>> You don't neccessarily always get a different IP for VPN connections,
>> as administrators may well choose to give users a fixed IP for their
>> VPN client. I'm not entirely against keepalives, but I thing making
>> it drop the connection after a mere 60 seconds is way too quick, if this
>> is enabled by default. I'd be more inclined to just have it use the
>> kernel defaults for timeouts
>>   
>
> That's around two hours.
>
> I understand the wariness when it comes to dropping connections, but 
> vnc is a reconnectable protocol; it isn't like you lose any data.  If 
> the connection drops for two minutes it is useless anyway.
>
Two hours is typically too long and 60 seconds is overly aggressive.  
Connection tracking devices often have a 10 minute timeout for idle 
connections -- the connection will magically evaporate after 600s of 
idle time.

In my experience, VPN connections usually last hours if there's a 
keepalive of some sort keeping them going.  It doesn't matter what the 
keepalive is, just so long as there's _some_ traffic keeping it ticking 
over.  Usually it's enough to set the default keepalive time (sysctl -w 
net.ipv4.tcp_keepalive_time=540, for example) -- in some cases 
keepalives don't make it through the network and you need some sort of 
application ping, but that's comparatively unusual.

 From a Linux perspective, I'd be inclined to just enable keepalives on 
the connection and let the user set the default keepalive interval if 
it's needed.

For those people that have seriously dodgy VPN connections that no 
amount of keepalive will keep up, they need some alternative.   Probably 
starting with a new VPN :-)

jch

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] PATCH: enabling TCP keepalives - v3
  2009-05-01 16:04           ` Paul Brook
@ 2009-05-01 16:11             ` David Ahern
  0 siblings, 0 replies; 19+ messages in thread
From: David Ahern @ 2009-05-01 16:11 UTC (permalink / raw)
  To: Paul Brook; +Cc: Anthony Liguori, qemu-devel



Paul Brook wrote:
>> I don't think an option is very useful.  If you lose your connection and
>> now have a bunch of orphan connections, it's too late to have the
>> foresight to have used an option on startup.  In that case, it would be
>> much better to just be able to close existing connections.
>>
>> I think there's a very, very small number of people that would have the
>> foresight to always use keepalive=60 or whatever the option would be.
> 
> In that case we should do nothing. Users with a flakey net connection can 
> tweak their kernel (via /proc/sys) to use lower timeouts. AFAICS There's no 
> way qemu can know what a "better" value is. As a concrete example my ADSL 
> connection takes between 90 and 180 seconds to resync after a noise spike, so 
> 60 seconds is absolutely the wrong timeout value here.
> 
> Paul

ie., just enable keepalives and use OS defaults? The linux default is
7200 seconds to start sending probes (which is 2 hours as Avi noted, not
7 hours as I misstated).

I'll send an updated patch.

david

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] PATCH: enabling TCP keepalives - v3
  2009-05-01 15:47       ` David Ahern
@ 2009-05-01 17:21         ` Richard W.M. Jones
  2009-05-05  1:31         ` Jamie Lokier
  1 sibling, 0 replies; 19+ messages in thread
From: Richard W.M. Jones @ 2009-05-01 17:21 UTC (permalink / raw)
  To: David Ahern; +Cc: Paul Brook, qemu-devel

On Fri, May 01, 2009 at 09:47:05AM -0600, David Ahern wrote:
> Per an earlier email, I can add an option: tcpkeep=i,j,k where i is the
> idle time, j is the interval for sending probes and k is the count of
> missed probes, but I think this is getting to be overkill.

Sounds to me like a good idea to make it optional and configurable.

Rich.

-- 
Richard Jones, Emerging Technologies, Red Hat  http://et.redhat.com/~rjones
virt-df lists disk usage of guests without needing to install any
software inside the virtual machine.  Supports Linux and Windows.
http://et.redhat.com/~rjones/virt-df/

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] PATCH: enabling TCP keepalives - v3
  2009-05-01 15:47       ` David Ahern
  2009-05-01 17:21         ` Richard W.M. Jones
@ 2009-05-05  1:31         ` Jamie Lokier
  2009-05-05  2:59           ` David Ahern
  1 sibling, 1 reply; 19+ messages in thread
From: Jamie Lokier @ 2009-05-05  1:31 UTC (permalink / raw)
  To: David Ahern; +Cc: qemu-devel, Paul Brook, Richard W.M. Jones

David Ahern wrote:
> > You don't neccessarily always get a different IP for VPN connections,
> > as administrators may well choose to give users a fixed IP for their
> > VPN client. I'm not entirely against keepalives, but I thing making
> 
> Agreed, you don't always get a different IP on reconnects, but in my
> case you do. Also, VPN users have no control over that; they just
> see/cause dead connections.

Sometimes I use a VPN to keep connections going when the underlying
network changes.

E.g. when suspend the laptop at work, taking it home, resuming, my
personal VPN means some connections (such as SSH sessions) are not
broken despite a short commute with the laptop off, and waking up on a
different network :-)

This is also handy when switching between a house network and a mobile
3G network, or between wireless networks.  SSH sessions continue
working because of the stable VPN on top.

For this I don't care about the encryption aspect so much as the VPN's
ability to maintain a stable IP despite the underlying network
changing.

So it does get used that way.

> The parameters I put in cause a drop after 2 minutes of no response --
> 60 seconds of idle (no data through the socket) followed by 60 seconds
> of failed probes. The default parameters for linux are harsh: 7 hours of
> idle time before the first keepalive is sent.

Is 7 hours a problem worth overriding the kernel defaults for?  How
many old VNC sessions are likely to get accumulated in that time?

2 minutes is a bit fast for a truly idle session, but as I said in
another response, if you have data sent by either end, then the
session will be broken by the lack of response in about 2 minutes
anyway - without keepalives.

-- Jamie

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] PATCH: enabling TCP keepalives - v3
  2009-05-01 16:11         ` John Haxby
@ 2009-05-05  1:35           ` Jamie Lokier
  0 siblings, 0 replies; 19+ messages in thread
From: Jamie Lokier @ 2009-05-05  1:35 UTC (permalink / raw)
  To: John Haxby; +Cc: qemu-devel, Richard W.M. Jones, Avi Kivity, David Ahern

John Haxby wrote:
> Connection tracking devices often have a 10 minute timeout for idle 
> connections -- the connection will magically evaporate after 600s of 
> idle time.

Oh if only.  I've seen NATs which expire after about 20 seconds.  My
VPN is configured to send NAT-keepalive pings every 15 seconds because
of that.

> For those people that have seriously dodgy VPN connections that no 
> amount of keepalive will keep up, they need some alternative.   Probably 
> starting with a new VPN :-)

Interesting perspective on VPNs.  I use a VPN sometimes because it's
more stable than the underlying network and keeps TCP connections
going where the underlying network would not :-)

-- Jamie

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [Qemu-devel] PATCH: enabling TCP keepalives - v3
  2009-05-05  1:31         ` Jamie Lokier
@ 2009-05-05  2:59           ` David Ahern
  0 siblings, 0 replies; 19+ messages in thread
From: David Ahern @ 2009-05-05  2:59 UTC (permalink / raw)
  To: Jamie Lokier; +Cc: qemu-devel, Paul Brook, Richard W.M. Jones



Jamie Lokier wrote:
>> The parameters I put in cause a drop after 2 minutes of no response --
>> 60 seconds of idle (no data through the socket) followed by 60 seconds
>> of failed probes. The default parameters for linux are harsh: 7 hours of
>> idle time before the first keepalive is sent.
> 
> Is 7 hours a problem worth overriding the kernel defaults for?  How
> many old VNC sessions are likely to get accumulated in that time?

The linux default is 7200 seconds; I'm not sure how I manage to convert
7200 seconds to 7 hours.

> 
> 2 minutes is a bit fast for a truly idle session, but as I said in
> another response, if you have data sent by either end, then the
> session will be broken by the lack of response in about 2 minutes
> anyway - without keepalives.


Version 5 of the patch drops the changes to the timers and the missed
probes count; it only enables tcp keepalives and the probes are sent
based on OS defaults.

david

> 
> -- Jamie

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2009-05-05  2:59 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-04-30 19:40 [Qemu-devel] PATCH: enabling TCP keepalives - v3 David Ahern
2009-05-01 11:32 ` Richard W.M. Jones
2009-05-01 12:23   ` Jamie Lokier
2009-05-01 12:49   ` David Ahern
2009-05-01 15:23     ` Daniel P. Berrange
2009-05-01 15:47       ` David Ahern
2009-05-01 17:21         ` Richard W.M. Jones
2009-05-05  1:31         ` Jamie Lokier
2009-05-05  2:59           ` David Ahern
2009-05-01 15:52       ` Avi Kivity
2009-05-01 16:11         ` John Haxby
2009-05-05  1:35           ` Jamie Lokier
2009-05-01 14:43 ` Anthony Liguori
2009-05-01 14:47   ` David Ahern
2009-05-01 14:51     ` Anthony Liguori
2009-05-01 15:16       ` Paul Brook
2009-05-01 15:57         ` Anthony Liguori
2009-05-01 16:04           ` Paul Brook
2009-05-01 16:11             ` David Ahern

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).