* [PATCH] NET: Add TCP connection abort IOCTL
@ 2007-03-27 21:47 David Griego
2007-03-27 22:30 ` David Miller
0 siblings, 1 reply; 19+ messages in thread
From: David Griego @ 2007-03-27 21:47 UTC (permalink / raw)
To: davem; +Cc: netdev
Adds an IOCTL for aborting established TCP connections, and is
designed to be an HA performance improvement for cleaning up, failure
notification, and application termination.
Signed-off-by: David Griego <dagriego@gmail.com>
---
include/linux/ipv6.h | 8 ++++
include/linux/socket.h | 5 ++
include/linux/sockios.h | 1
include/net/inet_hashtables.h | 6 +++
net/ipv4/tcp.c | 15 +++++++
net/ipv4/tcp_ipv4.c | 86 +++++++++++++++++++++++++++++++++++++++++
6 files changed, 121 insertions(+), 0 deletions(-)
---
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index f824113..42f6765 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -467,6 +467,14 @@ #define INET6_MATCH(__sk, __hash, __sadd
ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
+#define TCP_IPV6_WILDCARD_MATCH(__sk, __saddr, __sport,__daddr, __dport) \
+ ((ipv6_addr_any(&__saddr) || \
+ ipv6_addr_equal(&__saddr, &inet6_sk(__sk)->saddr)) && \
+ ((!__sport) || (ntohs(__sport) == inet_sk(__sk)->num)) && \
+ (ipv6_addr_any(&__daddr) || \
+ ipv6_addr_equal(&__daddr, &inet6_sk(__sk)->daddr)) && \
+ ((!__dport) || (__dport == inet_sk(__sk)->dport)))
+
#endif /* __KERNEL__ */
#endif /* _IPV6_H */
diff --git a/include/linux/socket.h b/include/linux/socket.h
index fcd35a2..0bf7b0a 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -48,6 +48,11 @@ struct linger {
#define sockaddr_storage __kernel_sockaddr_storage
+struct tcp_abort_sockaddr_storage {
+ struct sockaddr_storage local; /* local address for lookup */
+ struct sockaddr_storage remote; /* Remote address for lookup */
+};
+
/*
* As we do 4.4BSD message passing we use a 4.4BSD message passing
* system, not 4.3. Thus msg_accrights(len) are now missing. They
diff --git a/include/linux/sockios.h b/include/linux/sockios.h
index abef759..b850577 100644
--- a/include/linux/sockios.h
+++ b/include/linux/sockios.h
@@ -140,4 +140,5 @@ #define SIOCDEVPRIVATE 0x89F0 /* to 89FF
*/
#define SIOCPROTOPRIVATE 0x89E0 /* to 89EF */
+#define SIOCABORTCONN SIOCPROTOPRIVATE + 1
#endif /* _LINUX_SOCKIOS_H */
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index d27ee8c..735739a 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -339,6 +339,12 @@ #define INET_TW_MATCH(__sk, __hash,__coo
(!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif))))
#endif /* 64-bit arch */
+#define TCP_IPV4_WILDCARD_MATCH(__sk, __saddr, __sport,__daddr, __dport) \
+ (((__saddr == INADDR_ANY) || (__saddr == inet_sk(__sk)->saddr)) && \
+ ((!__sport) || (ntohs(__sport) == inet_sk(__sk)->num)) && \
+ ((__daddr == INADDR_ANY) || (__daddr == inet_sk(__sk)->daddr)) && \
+ ((!__dport) || (__dport == inet_sk(__sk)->dport)))
+
/*
* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so we need
* not check it for lookups anymore, thanks Alexey. -DaveM
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3834b10..b3e7a6f 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -442,6 +442,21 @@ int tcp_ioctl(struct sock *sk, int cmd,
else
answ = tp->write_seq - tp->snd_una;
break;
+ case SIOCABORTCONN:
+ {
+ struct tcp_abort_sockaddr_storage tcp_abort;
+ void __user *data = (void __user *)arg;
+
+ if (!capable(CAP_NET_ADMIN)){
+ return -EACCES;
+ }
+
+ /* Check permissions */
+ if (copy_from_user(&tcp_abort, data, sizeof(tcp_abort))) {
+ return -EFAULT;
+ }
+ return tcp_handle_abort_req(&tcp_abort.local, &tcp_abort.remote);
+ }
default:
return -ENOIOCTLCMD;
};
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0ba74bb..add483b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -123,6 +123,91 @@ void tcp_unhash(struct sock *sk)
inet_unhash(&tcp_hashinfo, sk);
}
+int tcp_handle_abort_req(struct sockaddr_storage *local,
+ struct sockaddr_storage *remote)
+{
+ int i, ret = -ENOENT;
+
+ /* Check for supported address families */
+ if((local->ss_family != remote->ss_family) ||
+ ((local->ss_family != AF_INET) && (local->ss_family != AF_INET6)))
+ return -EINVAL;
+
+ local_bh_disable();
+
+ /* cycle through all the established connecton buckets */
+ for (i = 0; i < tcp_hashinfo.ehash_size; ++i) {
+ struct sock *sk;
+ struct hlist_node *node;
+restart_bucket:
+
+ read_lock(&tcp_hashinfo.ehash[i].lock);
+ sk_for_each(sk, node, &tcp_hashinfo.ehash[i].chain) {
+ if (sk->sk_family != local->ss_family) {
+ continue;
+ }
+
+ switch(local->ss_family) {
+ case AF_INET:
+ {
+ struct sockaddr_in *ipv4_local =
+ (struct sockaddr_in *)local;
+ struct sockaddr_in *ipv4_remote =
+ (struct sockaddr_in *)remote;
+
+ if(TCP_IPV4_WILDCARD_MATCH(sk,
+ ipv4_local->sin_addr.s_addr,
+ ipv4_local->sin_port,
+ ipv4_remote->sin_addr.s_addr,
+ ipv4_remote->sin_port))
+ break;
+ continue;
+ }
+#ifdef CONFIG_IPV6
+ case AF_INET6:
+ {
+ struct sockaddr_in6 *ipv6_local =
+ (struct sockaddr_in6 *)local;
+ struct sockaddr_in6 *ipv6_remote =
+ (struct sockaddr_in6 *)remote;
+
+ if(TCP_IPV6_WILDCARD_MATCH(sk,
+ ipv6_local->sin6_addr,
+ ipv6_local->sin6_port,
+ ipv6_remote->sin6_addr,
+ ipv6_remote->sin6_port))
+ break;
+ continue;
+ }
+#endif /*CONFIG_IPV6 */
+ default: /* Not a supported address family */
+ continue;
+ }
+
+ /* Found a match so kill it */
+ ret = 0;
+ sock_hold(sk);
+ read_unlock(&tcp_hashinfo.ehash[i].lock);
+ local_bh_enable();
+ tcp_disconnect(sk, O_NONBLOCK);
+ local_bh_disable();
+ sock_put(sk);
+
+ /* Broke link and let go of the list lock, so restart
+ * our search for matches at the begining of this
+ * hash bucket
+ */
+ goto restart_bucket;
+ }
+
+ read_unlock(&tcp_hashinfo.ehash[i].lock);
+ }
+
+ local_bh_enable();
+
+ return ret;
+}
+
static inline __u32 tcp_v4_init_sequence(struct sk_buff *skb)
{
return secure_tcp_sequence_number(skb->nh.iph->daddr,
@@ -2462,6 +2547,7 @@ EXPORT_SYMBOL(ipv4_specific);
EXPORT_SYMBOL(tcp_hashinfo);
EXPORT_SYMBOL(tcp_prot);
EXPORT_SYMBOL(tcp_unhash);
+EXPORT_SYMBOL(tcp_handle_abort_req);
EXPORT_SYMBOL(tcp_v4_conn_request);
EXPORT_SYMBOL(tcp_v4_connect);
EXPORT_SYMBOL(tcp_v4_do_rcv);
--
1.4.1
^ permalink raw reply related [flat|nested] 19+ messages in thread* Re: [PATCH] NET: Add TCP connection abort IOCTL
2007-03-27 21:47 [PATCH] NET: Add TCP connection abort IOCTL David Griego
@ 2007-03-27 22:30 ` David Miller
2007-03-27 23:09 ` Mark Huth
` (2 more replies)
0 siblings, 3 replies; 19+ messages in thread
From: David Miller @ 2007-03-27 22:30 UTC (permalink / raw)
To: dagriego; +Cc: davem, netdev
From: dagriego@gmail.com (David Griego)
Date: Tue, 27 Mar 2007 14:47:54 -0700
> Adds an IOCTL for aborting established TCP connections, and is
> designed to be an HA performance improvement for cleaning up, failure
> notification, and application termination.
>
> Signed-off-by: David Griego <dagriego@gmail.com>
SO_LINGER with a zero linger time plus close() isn't working
properly?
There is no reason for this ioctl at all. Either existing
facilities provide what you need or what you want is a
protocol violation we can't do.
^ permalink raw reply [flat|nested] 19+ messages in thread* Re: [PATCH] NET: Add TCP connection abort IOCTL
2007-03-27 22:30 ` David Miller
@ 2007-03-27 23:09 ` Mark Huth
2007-03-27 23:36 ` David Miller
2007-03-28 0:27 ` John Heffner
2007-03-28 0:04 ` Rick Jones
2007-03-29 14:56 ` Predrag Hodoba
2 siblings, 2 replies; 19+ messages in thread
From: Mark Huth @ 2007-03-27 23:09 UTC (permalink / raw)
To: David Miller; +Cc: dagriego, davem, netdev
David Miller wrote:
> From: dagriego@gmail.com (David Griego)
> Date: Tue, 27 Mar 2007 14:47:54 -0700
>
>
>> Adds an IOCTL for aborting established TCP connections, and is
>> designed to be an HA performance improvement for cleaning up, failure
>> notification, and application termination.
>>
>> Signed-off-by: David Griego <dagriego@gmail.com>
>>
>
> SO_LINGER with a zero linger time plus close() isn't working
> properly?
>
> There is no reason for this ioctl at all. Either existing
> facilities provide what you need or what you want is a
> protocol violation we can't do.
>
Actually, there are legitimate uses for this sort of API. The patch
allows an administrator to kill specific connections that are in use by
other applications, where the close is not available, since the socket
is owned by another process. Say one of your large applications has
hundreds or even thousands of open connections and you have determined
that a particular connection is causing trouble. This API allows the
admin to kill that particular connection, and doesn't appear to violate
any RFC offhand, since an abort is sent to the peer.
One may argue that the applications should be modified, but that is not
always possible in the case of various ISVs. As Linux gains market
share in the large server market, more and more applications are being
ported from other platforms that have this sort of
management/administrative interfaces.
Mark Huth
> -
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] NET: Add TCP connection abort IOCTL
2007-03-27 23:09 ` Mark Huth
@ 2007-03-27 23:36 ` David Miller
2007-03-28 6:02 ` Eric Dumazet
2007-03-28 0:27 ` John Heffner
1 sibling, 1 reply; 19+ messages in thread
From: David Miller @ 2007-03-27 23:36 UTC (permalink / raw)
To: mhuth; +Cc: dagriego, davem, netdev
From: Mark Huth <mhuth@mvista.com>
Date: Tue, 27 Mar 2007 16:09:30 -0700
> Actually, there are legitimate uses for this sort of API. The patch
> allows an administrator to kill specific connections that are in use by
> other applications, where the close is not available, since the socket
> is owned by another process.
Anything that wants to act as an external agent to manipulate
or terminate connections should use netfilter.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] NET: Add TCP connection abort IOCTL
2007-03-27 23:36 ` David Miller
@ 2007-03-28 6:02 ` Eric Dumazet
2007-03-28 6:35 ` David Miller
0 siblings, 1 reply; 19+ messages in thread
From: Eric Dumazet @ 2007-03-28 6:02 UTC (permalink / raw)
To: David Miller; +Cc: mhuth, dagriego, davem, netdev
David Miller a écrit :
> From: Mark Huth <mhuth@mvista.com>
> Date: Tue, 27 Mar 2007 16:09:30 -0700
>
>> Actually, there are legitimate uses for this sort of API. The patch
>> allows an administrator to kill specific connections that are in use by
>> other applications, where the close is not available, since the socket
>> is owned by another process.
>
> Anything that wants to act as an external agent to manipulate
> or terminate connections should use netfilter.
This is what I thought too at the begining.
But after some thinking I recalled having to reboot machines just because
netfilter was not in (because of noticeable performance hit), and I could find
the tree to compile netfilter as modules..
When I saw revoke() work in progess, I did react like you : This is coming
from hell...
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] NET: Add TCP connection abort IOCTL
2007-03-28 6:02 ` Eric Dumazet
@ 2007-03-28 6:35 ` David Miller
0 siblings, 0 replies; 19+ messages in thread
From: David Miller @ 2007-03-28 6:35 UTC (permalink / raw)
To: dada1; +Cc: mhuth, dagriego, davem, netdev
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Wed, 28 Mar 2007 08:02:21 +0200
> This is what I thought too at the begining.
>
> But after some thinking I recalled having to reboot machines just because
> netfilter was not in (because of noticeable performance hit), and I could find
> the tree to compile netfilter as modules..
>
> When I saw revoke() work in progess, I did react like you : This is coming
> from hell...
Another option, similar in vain to what Herbert suggested (the gdb
hack) is to provide a way for root to open file descriptors of other
processes and handle it like that.
Then you could just set the linger socket option properly and close it
up.
Unfortunately we prevent openning of /proc/${PID}/fd/${FD} objects
when they are sockets for well documented reasons.
Using an ioctl with a socket demux key is just beyond disgusting, it's
as simple as that. We have abstractions already for these objects so
let's use them.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] NET: Add TCP connection abort IOCTL
2007-03-27 23:09 ` Mark Huth
2007-03-27 23:36 ` David Miller
@ 2007-03-28 0:27 ` John Heffner
2007-03-28 0:34 ` John Heffner
2007-03-28 1:52 ` David Miller
1 sibling, 2 replies; 19+ messages in thread
From: John Heffner @ 2007-03-28 0:27 UTC (permalink / raw)
To: Mark Huth; +Cc: David Miller, dagriego, davem, netdev
Mark Huth wrote:
>
>
> David Miller wrote:
>> From: dagriego@gmail.com (David Griego)
>> Date: Tue, 27 Mar 2007 14:47:54 -0700
>>
>>
>>> Adds an IOCTL for aborting established TCP connections, and is
>>> designed to be an HA performance improvement for cleaning up, failure
>>> notification, and application termination.
>>>
>>> Signed-off-by: David Griego <dagriego@gmail.com>
>>>
>>
>> SO_LINGER with a zero linger time plus close() isn't working
>> properly?
>>
>> There is no reason for this ioctl at all. Either existing
>> facilities provide what you need or what you want is a
>> protocol violation we can't do.
>>
> Actually, there are legitimate uses for this sort of API. The patch
> allows an administrator to kill specific connections that are in use by
> other applications, where the close is not available, since the socket
> is owned by another process. Say one of your large applications has
> hundreds or even thousands of open connections and you have determined
> that a particular connection is causing trouble. This API allows the
> admin to kill that particular connection, and doesn't appear to violate
> any RFC offhand, since an abort is sent to the peer.
>
> One may argue that the applications should be modified, but that is not
> always possible in the case of various ISVs. As Linux gains market
> share in the large server market, more and more applications are being
> ported from other platforms that have this sort of
> management/administrative interfaces.
>
> Mark Huth
I also believe this is a useful thing to have. I'm not 100% sure this
ioctl is the way to go, but it seems reasonable. This directly
corresponds to writing deleteTcb to the tcpConnectionState variable in
the TCP MIB (RFC 4022). I don't think it constitutes a protocol violation.
As a concrete example of a way I've used this type of feature is to
defend against a netkill [1] style attack, where the defense involves
making decisions about which connections to kill when memory gets
scarce. It makes sense to do this with a system daemon, since an admin
might have an arbitrarily complicated policy as to which applications
and peers have priority for the memory. This is too complicated to
distribute and enforce across all applications. You could do this in
the kernel, but why if you don't have to?
-John
[1] http://shlang.com/netkill/
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] NET: Add TCP connection abort IOCTL
2007-03-28 0:27 ` John Heffner
@ 2007-03-28 0:34 ` John Heffner
2007-03-28 3:09 ` Herbert Xu
2007-03-28 1:52 ` David Miller
1 sibling, 1 reply; 19+ messages in thread
From: John Heffner @ 2007-03-28 0:34 UTC (permalink / raw)
To: John Heffner; +Cc: Mark Huth, David Miller, dagriego, netdev
John Heffner wrote:
> I also believe this is a useful thing to have. I'm not 100% sure this
> ioctl is the way to go, but it seems reasonable. This directly
> corresponds to writing deleteTcb to the tcpConnectionState variable in
> the TCP MIB (RFC 4022). I don't think it constitutes a protocol violation.
Responding to myself in good form :P I'll add that there are other ways
to do this currently but all I know of are hackish, f.e. using a raw
socket to send RST packets to yourself.
-John
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] NET: Add TCP connection abort IOCTL
2007-03-28 0:27 ` John Heffner
2007-03-28 0:34 ` John Heffner
@ 2007-03-28 1:52 ` David Miller
1 sibling, 0 replies; 19+ messages in thread
From: David Miller @ 2007-03-28 1:52 UTC (permalink / raw)
To: jheffner; +Cc: mhuth, dagriego, davem, netdev
From: John Heffner <jheffner@psc.edu>
Date: Tue, 27 Mar 2007 20:27:44 -0400
> As a concrete example of a way I've used this type of feature is to
> defend against a netkill [1] style attack, where the defense involves
> making decisions about which connections to kill when memory gets
> scarce. It makes sense to do this with a system daemon, since an admin
> might have an arbitrarily complicated policy as to which applications
> and peers have priority for the memory. This is too complicated to
> distribute and enforce across all applications. You could do this in
> the kernel, but why if you don't have to?
On the contrary this sounds like an excellent task for
a netfilter based solution.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] NET: Add TCP connection abort IOCTL
2007-03-27 22:30 ` David Miller
2007-03-27 23:09 ` Mark Huth
@ 2007-03-28 0:04 ` Rick Jones
2007-03-29 14:56 ` Predrag Hodoba
2 siblings, 0 replies; 19+ messages in thread
From: Rick Jones @ 2007-03-28 0:04 UTC (permalink / raw)
To: David Miller; +Cc: dagriego, netdev
> There is no reason for this ioctl at all. Either existing
> facilities provide what you need or what you want is a
> protocol violation we can't do.
I agree that 99 times out of ten such a mechanism serves only as a
massive KLUDGE to paper-over application bugs. I'll also sadly
point-out that such a mechanism exists in HP-UX 11.X and I suspect
Solaris !-( I've spent probably the last decade or so attempting to
discourage its use in the HP-UX space, but like some daemon from hell it
just refuses to die.
rick jones
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] NET: Add TCP connection abort IOCTL
2007-03-27 22:30 ` David Miller
2007-03-27 23:09 ` Mark Huth
2007-03-28 0:04 ` Rick Jones
@ 2007-03-29 14:56 ` Predrag Hodoba
2007-03-29 18:41 ` David Miller
2 siblings, 1 reply; 19+ messages in thread
From: Predrag Hodoba @ 2007-03-29 14:56 UTC (permalink / raw)
To: David Miller; +Cc: David Griego, netdev
David Miller wrote:
> From: dagriego@gmail.com (David Griego)
> Date: Tue, 27 Mar 2007 14:47:54 -0700
>
>> Adds an IOCTL for aborting established TCP connections, and is
>> designed to be an HA performance improvement for cleaning up, failure
>> notification, and application termination.
>>
>> Signed-off-by: David Griego <dagriego@gmail.com>
>
> SO_LINGER with a zero linger time plus close() isn't working
> properly?
>
> There is no reason for this ioctl at all. Either existing
> facilities provide what you need or what you want is a
> protocol violation we can't do.
>
An ioctl like that is sorely missed for high-availability clustering on
Linux and things like the Carrier Grade Linux. Other Unices do have
something similar - there is TCP_IOC_ABORT_CONN ioctl on Solaris and on
FreeBSD/OpenBSD there is TCPCTL_DROP ioctl (accompanied by tcpdrop
utility).
Need for such an API is to a degree indicated in the Carrier Grade Linux
requirements by The Linux Foundation (former OSDL). CGL Clustering
requirements document addresses the need to deal with open TCP sessions in
events of node failure. For example, Carrier Grade Linux Clustering
Requirements Definition (at
http://groups.osdl.org/apps/group_public/download.php/1981/cgl-cluster.pdf)
has two such requirements on its roadmap list:
CAF.2.3 Deliberate TCP Session Takeover
CAF.2.4 TCP Session Takeover on Node Failure
(There they talk about session takeover, which is in my opinion quite
difficult to implement, and before such a thing is achieved there is a
clear need for ioctl similar to the one proposed - to tell all clients with
TCP sessions towards the IP address of a failed node that their sessions
are broken, and that they should handle that instantly, not wait for
timeouts to expire. And a client might be a general 3rd party software
where one does not set socket options oneself.)
I'll give a possible use case - let's say we have a highly-available cluster
in active/passive configuration. The active node runs some services and
listens on cluster's IP address. Clients use the cluster's services by
establishing TCP sessions to that IP address. When the active node fails,
the other node should as fast as possible take over the same IP address and
offer the same services, with clients not noticing anything or noticing as
little as possible. Ideally, TCP sessions should be taken over (as
indicated in the Carrier Grade Linux requirements), enabling clients to
continue transparently. But, TCP session takeover is a rather tough
requirement, and before something like that exists (if ever) it would be
quite useful with such ioctl to enable clients to discover broken
sessions and recover in matter of seconds instead of minutes. Obviously, if
one envisages failover of any kind of service, then you can't control what
socket options are used by someone.
Cheers,
Predrag Hodoba
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] NET: Add TCP connection abort IOCTL
2007-03-29 14:56 ` Predrag Hodoba
@ 2007-03-29 18:41 ` David Miller
2007-03-30 1:09 ` Stephen Hemminger
0 siblings, 1 reply; 19+ messages in thread
From: David Miller @ 2007-03-29 18:41 UTC (permalink / raw)
To: predrag.hodoba; +Cc: dagriego, netdev
From: "Predrag Hodoba" <predrag.hodoba@gmail.com>
Date: Thu, 29 Mar 2007 16:56:22 +0200
> Need for such an API is to a degree indicated in the Carrier Grade Linux
> requirements by The Linux Foundation (former OSDL).
Something being in the CGL specification is to me exactly a great
reason NOT to add it. That specification is so full of garbage it's
unbelievable.
Thanks, you've given me one more reason not to even remotely consider
adding this feature.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] NET: Add TCP connection abort IOCTL
2007-03-29 18:41 ` David Miller
@ 2007-03-30 1:09 ` Stephen Hemminger
2007-03-30 15:10 ` Predrag Hodoba
0 siblings, 1 reply; 19+ messages in thread
From: Stephen Hemminger @ 2007-03-30 1:09 UTC (permalink / raw)
To: David Miller; +Cc: predrag.hodoba, dagriego, netdev
David Miller wrote:
> From: "Predrag Hodoba" <predrag.hodoba@gmail.com>
> Date: Thu, 29 Mar 2007 16:56:22 +0200
>
>
>> Need for such an API is to a degree indicated in the Carrier Grade Linux
>> requirements by The Linux Foundation (former OSDL).
>>
>
> Something being in the CGL specification is to me exactly a great
> reason NOT to add it. That specification is so full of garbage it's
> unbelievable.
>
> Thanks, you've given me one more reason not to even remotely consider
> adding this feature.
>
Agreed, CGL is a vendor driven group that has always wanted to replicate
proprietary misfeatures onto Linux. There is a real requirement to
provide high availability but there should be no requirement to implement
the solution in the same crap way as legacy Unix.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] NET: Add TCP connection abort IOCTL
2007-03-30 1:09 ` Stephen Hemminger
@ 2007-03-30 15:10 ` Predrag Hodoba
2007-03-30 18:33 ` Stephen Hemminger
0 siblings, 1 reply; 19+ messages in thread
From: Predrag Hodoba @ 2007-03-30 15:10 UTC (permalink / raw)
To: Stephen Hemminger, David Miller; +Cc: dagriego, netdev
On 30/03/07, Stephen Hemminger <shemminger@osdl.org> wrote:
> David Miller wrote:
> >
> > Something being in the CGL specification is to me exactly a great
> > reason NOT to add it. That specification is so full of garbage it's
> > unbelievable.
> >
> > Thanks, you've given me one more reason not to even remotely consider
> > adding this feature.
> >
> Agreed, CGL is a vendor driven group that has always wanted to replicate
> proprietary misfeatures onto Linux. There is a real requirement to
> provide high availability but there should be no requirement to implement
> the solution in the same crap way as legacy Unix.
OK, let's put aside CGL and legacy Unices.
Still, I don't see how the case I mentioned can easily be handled.
(The case being - effective clean up of all affected client TCP
connections, following failover of the server IP address from active
to passive node in a highly available cluster).
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] NET: Add TCP connection abort IOCTL
2007-03-30 15:10 ` Predrag Hodoba
@ 2007-03-30 18:33 ` Stephen Hemminger
2007-03-30 19:09 ` Predrag Hodoba
0 siblings, 1 reply; 19+ messages in thread
From: Stephen Hemminger @ 2007-03-30 18:33 UTC (permalink / raw)
To: Predrag Hodoba; +Cc: David Miller, dagriego, netdev
Predrag Hodoba wrote:
> On 30/03/07, Stephen Hemminger <shemminger@osdl.org> wrote:
>> David Miller wrote:
>> >
>> > Something being in the CGL specification is to me exactly a great
>> > reason NOT to add it. That specification is so full of garbage it's
>> > unbelievable.
>> >
>> > Thanks, you've given me one more reason not to even remotely consider
>> > adding this feature.
>> >
>> Agreed, CGL is a vendor driven group that has always wanted to replicate
>> proprietary misfeatures onto Linux. There is a real requirement to
>> provide high availability but there should be no requirement to
>> implement
>> the solution in the same crap way as legacy Unix.
>
> OK, let's put aside CGL and legacy Unices.
>
> Still, I don't see how the case I mentioned can easily be handled.
> (The case being - effective clean up of all affected client TCP
> connections, following failover of the server IP address from active
> to passive node in a highly available cluster).
Why clean them up? The client connections will timeout and they can
reconnect. Actively killing them early does nothing helpful. Just like
the CGL requirement for forced unmount, the forced operation introduces
a whole bunch of race conditions and shared file descriptor problems.
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] NET: Add TCP connection abort IOCTL
2007-03-30 18:33 ` Stephen Hemminger
@ 2007-03-30 19:09 ` Predrag Hodoba
2007-03-30 20:46 ` Rick Jones
0 siblings, 1 reply; 19+ messages in thread
From: Predrag Hodoba @ 2007-03-30 19:09 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: David Miller, dagriego, netdev
On 30/03/07, Stephen Hemminger <shemminger@linux-foundation.org> wrote:
> Predrag Hodoba wrote:
> > On 30/03/07, Stephen Hemminger <shemminger@osdl.org> wrote:
> >> David Miller wrote:
> >> >
> >> > Something being in the CGL specification is to me exactly a great
> >> > reason NOT to add it. That specification is so full of garbage it's
> >> > unbelievable.
> >> >
> >> > Thanks, you've given me one more reason not to even remotely consider
> >> > adding this feature.
> >> >
> >> Agreed, CGL is a vendor driven group that has always wanted to replicate
> >> proprietary misfeatures onto Linux. There is a real requirement to
> >> provide high availability but there should be no requirement to
> >> implement
> >> the solution in the same crap way as legacy Unix.
> >
> > OK, let's put aside CGL and legacy Unices.
> >
> > Still, I don't see how the case I mentioned can easily be handled.
> > (The case being - effective clean up of all affected client TCP
> > connections, following failover of the server IP address from active
> > to passive node in a highly available cluster).
>
> Why clean them up? The client connections will timeout and they can
> reconnect. Actively killing them early does nothing helpful. Just like
> the CGL requirement for forced unmount, the forced operation introduces
> a whole bunch of race conditions and shared file descriptor problems.
Well, it depends on how fast you have to react on failure. For
data-center grade high-availability it is, as you said, enough to wait
for clients to timeout. For telco (or similar, more demanding) grade
of high-availability, timeout just takes too long. You typically have
to discover failure using some kind of heartbeat mechanism and clean
up immediately ...
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] NET: Add TCP connection abort IOCTL
2007-03-30 19:09 ` Predrag Hodoba
@ 2007-03-30 20:46 ` Rick Jones
2007-03-31 6:25 ` Predrag Hodoba
0 siblings, 1 reply; 19+ messages in thread
From: Rick Jones @ 2007-03-30 20:46 UTC (permalink / raw)
To: Predrag Hodoba; +Cc: Stephen Hemminger, David Miller, dagriego, netdev
If the switchover from active to standby is "commanded" then there is
the opportunity to "tell" the applications on the server to close their
connections - either explicitly with some sort of defined interface, or
implicitly by killing the processes. Then the IP can be brought-up on
the standby and processes started/enabled/whatever and the clients can
establish their new connections. The ioctl here (at least if it is like
the tcp_discon options in HP-UX/Solaris) wouldn't be any better than
just killing the process in so far as what happens on the network - in
fact, it could be worse since the RST will not be retransmitted if lost,
but FINs would. So, the ioctl could still leave clients twisting in the
ether waiting for their application-level heartbeats to kick-in anyway.
Heck, depending on their heartbeat lengths, even the FIN stuff if lost
could leave them depending on their heartbeats.
If the switchover from active to standby is "uncommanded" it probably
means the primary went belly-up which means you don't have the
opportunity to make an ioctl call anyway, and you are back to the
heartbeats.
rick jones
^ permalink raw reply [flat|nested] 19+ messages in thread
* Re: [PATCH] NET: Add TCP connection abort IOCTL
2007-03-30 20:46 ` Rick Jones
@ 2007-03-31 6:25 ` Predrag Hodoba
0 siblings, 0 replies; 19+ messages in thread
From: Predrag Hodoba @ 2007-03-31 6:25 UTC (permalink / raw)
To: Rick Jones; +Cc: Stephen Hemminger, David Miller, dagriego, netdev
On 30/03/07, Rick Jones <rick.jones2@hp.com> wrote:
> If the switchover from active to standby is "commanded" then there is
> the opportunity to "tell" the applications on the server to close their
> connections - either explicitly with some sort of defined interface, or
> implicitly by killing the processes. Then the IP can be brought-up on
> the standby and processes started/enabled/whatever and the clients can
> establish their new connections. The ioctl here (at least if it is like
> the tcp_discon options in HP-UX/Solaris) wouldn't be any better than
> just killing the process in so far as what happens on the network - in
> fact, it could be worse since the RST will not be retransmitted if lost,
> but FINs would. So, the ioctl could still leave clients twisting in the
> ether waiting for their application-level heartbeats to kick-in anyway.
> Heck, depending on their heartbeat lengths, even the FIN stuff if lost
> could leave them depending on their heartbeats.
>
> If the switchover from active to standby is "uncommanded" it probably
> means the primary went belly-up which means you don't have the
> opportunity to make an ioctl call anyway, and you are back to the
> heartbeats.
>
> rick jones
What I meant is - it could be used on ***client***. Because clients
are left stranded with invalid connections when a primary fails (your
"uncommanded" switchover scenario). If you wait for them to timeout,
that will indeed happen, but it takes time and you are not back online
as fast as you would like. If cluster's services running on a client
already know about the failover (by means of "heartbeat" and observing
change in cluster membership), then they can propagate that knowledge
to all processes uneccessarily blocked in their socket calls towards
the failed IP address. If these connections are forcibly disconnected,
the respective sockets' calls would return with error code and their
processes can reconnect in few seconds after the failure and continue
to do what they are meant to do.
predrag
^ permalink raw reply [flat|nested] 19+ messages in thread
end of thread, other threads:[~2007-03-31 6:25 UTC | newest]
Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-03-27 21:47 [PATCH] NET: Add TCP connection abort IOCTL David Griego
2007-03-27 22:30 ` David Miller
2007-03-27 23:09 ` Mark Huth
2007-03-27 23:36 ` David Miller
2007-03-28 6:02 ` Eric Dumazet
2007-03-28 6:35 ` David Miller
2007-03-28 0:27 ` John Heffner
2007-03-28 0:34 ` John Heffner
2007-03-28 3:09 ` Herbert Xu
2007-03-28 1:52 ` David Miller
2007-03-28 0:04 ` Rick Jones
2007-03-29 14:56 ` Predrag Hodoba
2007-03-29 18:41 ` David Miller
2007-03-30 1:09 ` Stephen Hemminger
2007-03-30 15:10 ` Predrag Hodoba
2007-03-30 18:33 ` Stephen Hemminger
2007-03-30 19:09 ` Predrag Hodoba
2007-03-30 20:46 ` Rick Jones
2007-03-31 6:25 ` Predrag Hodoba
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).