From: Simon Horman <horms@verge.net.au>
To: netdev@vger.kernel.org, lvs-devel@vger.kernel.org
Cc: "Malcolm Turnbull" <malcolm@loadbalancer.org>,
"Siim Põder" <siim@p6drad-teel.net>,
"Julius Volz" <juliusv@google.com>,
"Vince Busam" <vbusam@google.com>
Subject: [PATCH 1/2] ipvs: load balance IPv4 connections from a local process
Date: Fri, 5 Sep 2008 11:36:11 +1000 [thread overview]
Message-ID: <20080905013609.GD14128@verge.net.au> (raw)
From: Malcolm Turnbull <malcolm@loadbalancer.org>
ipvs: load balance IPv4 connections from a local process
This allows IPVS to load balance connections made by a local process.
For example a proxy server running locally.
External client --> pound:443 -> Local:443 --> IPVS:80 --> RealServer
Signed-off-by: Siim Põder <siim@p6drad-teel.net>
Signed-off-by: Malcolm Turnbull <malcolm@loadbalancer.org>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/ipv4/ipvs/ip_vs_core.c | 224 ++++++++++++++++++++++-----------------
net/ipv4/ipvs/ip_vs_proto_tcp.c | 4
2 files changed, 134 insertions(+), 94 deletions(-)
* Simon Horman, Wed, 03 Sep 2008 14:50:36 +1000
I have updated this patch so that it will apply on top
of the current IPv6 patches.
http://marc.info/?l=linux-netdev&m=122036407428246&w=2
I have also updated the patch so that it does not handle IPv6 packets.
I have an additional patch that I will provide to exetend
the code to handle IPv6 connections.
* Simon Horman, Fri, 05 Sep 2008 11:32:38 +1000
I have applied this patch to the net-next-2.6 branck of lvs-2.6
git://git.kernel.org/pub/scm/linux/kernel/git/horms/lvs-2.6.git
Index: lvs-2.6/net/ipv4/ipvs/ip_vs_core.c
===================================================================
--- lvs-2.6.orig/net/ipv4/ipvs/ip_vs_core.c 2008-09-03 11:01:38.000000000 +1000
+++ lvs-2.6/net/ipv4/ipvs/ip_vs_core.c 2008-09-03 12:17:49.000000000 +1000
@@ -651,12 +651,53 @@ void ip_vs_nat_icmp_v6(struct sk_buff *s
}
#endif
+/* Handle relevant response ICMP messages - forward to the right
+ * destination host. Used for NAT and local client.
+ */
+static int handle_response_icmp(struct sk_buff *skb, struct iphdr *iph,
+ struct iphdr *cih, struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp,
+ unsigned int offset, unsigned int ihl)
+{
+ unsigned int verdict = NF_DROP;
+
+ if (IP_VS_FWD_METHOD(cp) != 0) {
+ IP_VS_ERR("shouldn't reach here, because the box is on the "
+ "half connection in the tun/dr module.\n");
+ }
+
+ /* Ensure the checksum is correct */
+ if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
+ /* Failed checksum! */
+ IP_VS_DBG(1,
+ "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
+ NIPQUAD(iph->saddr));
+ goto out;
+ }
+
+ if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
+ offset += 2 * sizeof(__u16);
+ if (!skb_make_writable(skb, offset))
+ goto out;
+
+ ip_vs_nat_icmp(skb, pp, cp, 1);
+
+ /* do the statistics and put it back */
+ ip_vs_out_stats(cp, skb);
+
+ skb->ipvs_property = 1;
+ verdict = NF_ACCEPT;
+
+out:
+ __ip_vs_conn_put(cp);
+
+ return verdict;
+}
+
/*
* Handle ICMP messages in the inside-to-outside direction (outgoing).
- * Find any that might be relevant, check against existing connections,
- * forward to the right destination host if relevant.
+ * Find any that might be relevant, check against existing connections.
* Currently handles error types - unreachable, quench, ttl exceeded.
- * (Only used in VS/NAT)
*/
static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
{
@@ -666,7 +707,7 @@ static int ip_vs_out_icmp(struct sk_buff
struct ip_vs_iphdr ciph;
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
- unsigned int offset, ihl, verdict;
+ unsigned int offset, ihl;
*related = 1;
@@ -725,38 +766,7 @@ static int ip_vs_out_icmp(struct sk_buff
if (!cp)
return NF_ACCEPT;
- verdict = NF_DROP;
-
- if (IP_VS_FWD_METHOD(cp) != 0) {
- IP_VS_ERR("shouldn't reach here, because the box is on the "
- "half connection in the tun/dr module.\n");
- }
-
- /* Ensure the checksum is correct */
- if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
- /* Failed checksum! */
- IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
- NIPQUAD(iph->saddr));
- goto out;
- }
-
- if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
- offset += 2 * sizeof(__u16);
- if (!skb_make_writable(skb, offset))
- goto out;
-
- ip_vs_nat_icmp(skb, pp, cp, 1);
-
- /* do the statistics and put it back */
- ip_vs_out_stats(cp, skb);
-
- skb->ipvs_property = 1;
- verdict = NF_ACCEPT;
-
- out:
- __ip_vs_conn_put(cp);
-
- return verdict;
+ return handle_response_icmp(skb, iph, cih, cp, pp, offset, ihl);
}
#ifdef CONFIG_IP_VS_IPV6
@@ -875,10 +885,76 @@ static inline int is_tcp_reset(const str
return th->rst;
}
+/* Handle response packets: rewrite addresses and send away...
+ * Used for NAT and local client.
+ */
+static unsigned int
+handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+ struct ip_vs_conn *cp, int ihl)
+{
+ IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
+
+ if (!skb_make_writable(skb, ihl))
+ goto drop;
+
+ /* mangle the packet */
+ if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
+ goto drop;
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ ipv6_hdr(skb)->saddr = cp->vaddr.in6;
+ else
+#endif
+ {
+ ip_hdr(skb)->saddr = cp->vaddr.ip;
+ ip_send_check(ip_hdr(skb));
+ }
+
+ /* For policy routing, packets originating from this
+ * machine itself may be routed differently to packets
+ * passing through. We want this packet to be routed as
+ * if it came from this machine itself. So re-compute
+ * the routing information.
+ */
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6) {
+ if (ip6_route_me_harder(skb) != 0)
+ goto drop;
+ } else
+#endif
+ if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
+ goto drop;
+
+ /* For policy routing, packets originating from this
+ * machine itself may be routed differently to packets
+ * passing through. We want this packet to be routed as
+ * if it came from this machine itself. So re-compute
+ * the routing information.
+ */
+ if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
+ goto drop;
+
+ IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
+
+ ip_vs_out_stats(cp, skb);
+ ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
+ ip_vs_conn_put(cp);
+
+ skb->ipvs_property = 1;
+
+ LeaveFunction(11);
+ return NF_ACCEPT;
+
+drop:
+ ip_vs_conn_put(cp);
+ kfree_skb(skb);
+ return NF_STOLEN;
+}
+
/*
* It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
- * Check if outgoing packet belongs to the established ip_vs_conn,
- * rewrite addresses of the packet and send it on its way...
+ * Check if outgoing packet belongs to the established ip_vs_conn.
*/
static unsigned int
ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
@@ -987,55 +1063,7 @@ ip_vs_out(unsigned int hooknum, struct s
return NF_ACCEPT;
}
- IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
-
- if (!skb_make_writable(skb, iph.len))
- goto drop;
-
- /* mangle the packet */
- if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
- goto drop;
-
-#ifdef CONFIG_IP_VS_IPV6
- if (af == AF_INET6)
- ipv6_hdr(skb)->saddr = cp->vaddr.in6;
- else
-#endif
- {
- ip_hdr(skb)->saddr = cp->vaddr.ip;
- ip_send_check(ip_hdr(skb));
- }
-
- /* For policy routing, packets originating from this
- * machine itself may be routed differently to packets
- * passing through. We want this packet to be routed as
- * if it came from this machine itself. So re-compute
- * the routing information.
- */
-#ifdef CONFIG_IP_VS_IPV6
- if (af == AF_INET6) {
- if (ip6_route_me_harder(skb) != 0)
- goto drop;
- } else
-#endif
- if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
- goto drop;
-
- IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
-
- ip_vs_out_stats(cp, skb);
- ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
- ip_vs_conn_put(cp);
-
- skb->ipvs_property = 1;
-
- LeaveFunction(11);
- return NF_ACCEPT;
-
- drop:
- ip_vs_conn_put(cp);
- kfree_skb(skb);
- return NF_STOLEN;
+ return handle_response(af, skb, pp, cp, iph.len);
}
@@ -1111,8 +1139,14 @@ ip_vs_in_icmp(struct sk_buff *skb, int *
ip_vs_fill_iphdr(AF_INET, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1);
- if (!cp)
+ if (!cp) {
+ /* The packet could also belong to a local client */
+ cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+ if (cp)
+ return handle_response_icmp(skb, iph, cih, cp, pp,
+ offset, ihl);
return NF_ACCEPT;
+ }
verdict = NF_DROP;
@@ -1244,11 +1278,12 @@ ip_vs_in(unsigned int hooknum, struct sk
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
/*
- * Big tappo: only PACKET_HOST (neither loopback nor mcasts)
- * ... don't know why 1st test DOES NOT include 2nd (?)
+ * Big tappo: only PACKET_HOST, including loopback for local client
+ * Don't handle local packets on IPv6 for now
*/
- if (unlikely(skb->pkt_type != PACKET_HOST
- || skb->dev->flags & IFF_LOOPBACK || skb->sk)) {
+ if (unlikely(skb->pkt_type != PACKET_HOST ||
+ (af == AF_INET6 || (skb->dev->flags & IFF_LOOPBACK ||
+ skb->sk)))) {
IP_VS_DBG_BUF(12, "packet type=%d proto=%d daddr=%s ignored\n",
skb->pkt_type,
iph.protocol,
@@ -1277,6 +1312,11 @@ ip_vs_in(unsigned int hooknum, struct sk
if (unlikely(!cp)) {
int v;
+ /* For local client packets, it could be a response */
+ cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
+ if (cp)
+ return handle_response(af, skb, pp, cp, iph.len);
+
if (!pp->conn_schedule(af, skb, pp, &v, &cp))
return v;
}
Index: lvs-2.6/net/ipv4/ipvs/ip_vs_proto_tcp.c
===================================================================
--- lvs-2.6.orig/net/ipv4/ipvs/ip_vs_proto_tcp.c 2008-09-03 10:56:05.000000000 +1000
+++ lvs-2.6/net/ipv4/ipvs/ip_vs_proto_tcp.c 2008-09-03 11:24:26.000000000 +1000
@@ -166,7 +166,7 @@ tcp_snat_handler(struct sk_buff *skb,
tcph->source = cp->vport;
/* Adjust TCP checksums */
- if (!cp->app) {
+ if (!cp->app && (tcph->check != 0)) {
/* Only port and addr are changed, do fast csum update */
tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr,
cp->dport, cp->vport);
@@ -235,7 +235,7 @@ tcp_dnat_handler(struct sk_buff *skb,
/*
* Adjust TCP checksums
*/
- if (!cp->app) {
+ if (!cp->app && (tcph->check != 0)) {
/* Only port and addr are changed, do fast csum update */
tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr,
cp->vport, cp->dport);
next reply other threads:[~2008-09-05 1:36 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-09-05 1:36 Simon Horman [this message]
2008-09-05 1:37 ` [PATCH 2/2] ipvs: load balance ipv6 connections from a local process Simon Horman
2008-09-05 11:40 ` Julius Volz
2008-09-05 15:55 ` Brian Haley
2008-09-05 16:37 ` Julius Volz
2008-09-06 4:14 ` Simon Horman
2008-09-06 9:26 ` Julius Volz
2008-09-08 0:30 ` Simon Horman
2008-09-08 1:48 ` Simon Horman
2008-09-08 9:30 ` Julius Volz
2008-09-08 9:50 ` Simon Horman
2008-09-05 5:12 ` [PATCH 1/2] ipvs: load balance IPv4 " Julian Anastasov
2008-09-05 5:49 ` Siim Põder
2008-09-05 5:49 ` Siim Põder
2008-09-06 7:43 ` Simon Horman
2008-09-05 11:02 ` Julius Volz
2008-09-06 3:56 ` Simon Horman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080905013609.GD14128@verge.net.au \
--to=horms@verge.net.au \
--cc=juliusv@google.com \
--cc=lvs-devel@vger.kernel.org \
--cc=malcolm@loadbalancer.org \
--cc=netdev@vger.kernel.org \
--cc=siim@p6drad-teel.net \
--cc=vbusam@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.