* [PATCH 1/8] IPVS: Backup, Prepare for transferring firewall marks (fwmark) to the backup daemon.
2010-11-25 1:46 [PATCH 0/8] ipvs: ipvs update for nf-next-2.6 Simon Horman
@ 2010-11-25 1:46 ` Simon Horman
2010-11-25 1:46 ` [PATCH 2/8] IPVS: Split ports[2] into src_port and dst_port Simon Horman
` (7 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Simon Horman @ 2010-11-25 1:46 UTC (permalink / raw)
To: lvs-devel, netdev, netfilter, netfilter-devel
Cc: Hans Schillstrom, Julian Anastasov, Patrick McHardy, Simon Horman
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
One struct will have fwmark added:
* ip_vs_conn
ip_vs_conn_new() and ip_vs_find_dest()
will have an extra param - fwmark
The effects of that, is in this patch.
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 6 ++++--
net/netfilter/ipvs/ip_vs_conn.c | 5 +++--
net/netfilter/ipvs/ip_vs_core.c | 8 ++++----
net/netfilter/ipvs/ip_vs_ctl.c | 4 ++--
net/netfilter/ipvs/ip_vs_ftp.c | 5 +++--
net/netfilter/ipvs/ip_vs_sync.c | 4 ++--
6 files changed, 18 insertions(+), 14 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index d5a32e4..890f01c 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -382,6 +382,7 @@ struct ip_vs_conn {
union nf_inet_addr vaddr; /* virtual address */
union nf_inet_addr daddr; /* destination address */
volatile __u32 flags; /* status flags */
+ __u32 fwmark; /* Fire wall mark from skb */
__be16 cport;
__be16 vport;
__be16 dport;
@@ -720,7 +721,7 @@ extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport);
struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p,
const union nf_inet_addr *daddr,
__be16 dport, unsigned flags,
- struct ip_vs_dest *dest);
+ struct ip_vs_dest *dest, __u32 fwmark);
extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
extern const char * ip_vs_state_name(__u16 proto, int state);
@@ -901,7 +902,8 @@ extern int ip_vs_control_init(void);
extern void ip_vs_control_cleanup(void);
extern struct ip_vs_dest *
ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport,
- const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol);
+ const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol,
+ __u32 fwmark);
extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 7615f9e..66e4662 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -613,7 +613,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
if ((cp) && (!cp->dest)) {
dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport,
&cp->vaddr, cp->vport,
- cp->protocol);
+ cp->protocol, cp->fwmark);
ip_vs_bind_dest(cp, dest);
return dest;
} else
@@ -803,7 +803,7 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp)
struct ip_vs_conn *
ip_vs_conn_new(const struct ip_vs_conn_param *p,
const union nf_inet_addr *daddr, __be16 dport, unsigned flags,
- struct ip_vs_dest *dest)
+ struct ip_vs_dest *dest, __u32 fwmark)
{
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol);
@@ -827,6 +827,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
&cp->daddr, daddr);
cp->dport = dport;
cp->flags = flags;
+ cp->fwmark = fwmark;
if (flags & IP_VS_CONN_F_TEMPLATE && p->pe) {
ip_vs_pe_get(p->pe);
cp->pe = p->pe;
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index b4e51e9..e2bb3cd 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -293,7 +293,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* and thus param.pe_data will be destroyed
* when the template expires */
ct = ip_vs_conn_new(¶m, &dest->addr, dport,
- IP_VS_CONN_F_TEMPLATE, dest);
+ IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
if (ct == NULL) {
kfree(param.pe_data);
return NULL;
@@ -319,7 +319,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
*/
ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0],
&iph.daddr, ports[1], ¶m);
- cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest);
+ cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest, skb->mark);
if (cp == NULL) {
ip_vs_conn_put(ct);
return NULL;
@@ -423,7 +423,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
pptr[0], &iph.daddr, pptr[1], &p);
cp = ip_vs_conn_new(&p, &dest->addr,
dest->port ? dest->port : pptr[1],
- flags, dest);
+ flags, dest, skb->mark);
if (!cp)
return NULL;
}
@@ -489,7 +489,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
&iph.daddr, pptr[1], &p);
cp = ip_vs_conn_new(&p, &daddr, 0,
IP_VS_CONN_F_BYPASS | flags,
- NULL);
+ NULL, skb->mark);
if (!cp)
return NF_DROP;
}
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 3e92558..a5bd002 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -657,12 +657,12 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
__be16 dport,
const union nf_inet_addr *vaddr,
- __be16 vport, __u16 protocol)
+ __be16 vport, __u16 protocol, __u32 fwmark)
{
struct ip_vs_dest *dest;
struct ip_vs_service *svc;
- svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
+ svc = ip_vs_service_get(af, fwmark, protocol, vaddr, vport);
if (!svc)
return NULL;
dest = ip_vs_lookup_dest(svc, daddr, dport);
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 7545500..84aef65 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -208,7 +208,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
n_cp = ip_vs_conn_new(&p, &from, port,
IP_VS_CONN_F_NO_CPORT |
IP_VS_CONN_F_NFCT,
- cp->dest);
+ cp->dest, skb->mark);
if (!n_cp)
return 0;
@@ -365,7 +365,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
if (!n_cp) {
n_cp = ip_vs_conn_new(&p, &cp->daddr,
htons(ntohs(cp->dport)-1),
- IP_VS_CONN_F_NFCT, cp->dest);
+ IP_VS_CONN_F_NFCT, cp->dest,
+ skb->mark);
if (!n_cp)
return 0;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 3897d6b..47eed67 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -404,7 +404,7 @@ static void ip_vs_process_message(char *buffer, const size_t buflen)
s->dport,
(union nf_inet_addr *)&s->vaddr,
s->vport,
- s->protocol);
+ s->protocol, 0);
/* Set the approprite ativity flag */
if (s->protocol == IPPROTO_TCP) {
if (state != IP_VS_TCP_S_ESTABLISHED)
@@ -419,7 +419,7 @@ static void ip_vs_process_message(char *buffer, const size_t buflen)
}
cp = ip_vs_conn_new(¶m,
(union nf_inet_addr *)&s->daddr,
- s->dport, flags, dest);
+ s->dport, flags, dest, 0);
if (dest)
atomic_dec(&dest->refcnt);
if (!cp) {
--
1.7.2.3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 2/8] IPVS: Split ports[2] into src_port and dst_port
2010-11-25 1:46 [PATCH 0/8] ipvs: ipvs update for nf-next-2.6 Simon Horman
2010-11-25 1:46 ` [PATCH 1/8] IPVS: Backup, Prepare for transferring firewall marks (fwmark) to the backup daemon Simon Horman
@ 2010-11-25 1:46 ` Simon Horman
2010-11-25 1:46 ` [PATCH 3/8] IPVS: skb defrag in L7 helpers Simon Horman
` (6 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Simon Horman @ 2010-11-25 1:46 UTC (permalink / raw)
To: lvs-devel, netdev, netfilter, netfilter-devel
Cc: Hans Schillstrom, Julian Anastasov, Patrick McHardy, Simon Horman
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
Avoid sending invalid pointer due to skb_linearize() call.
This patch prepares for next patch where skb_linearize is a part.
In ip_vs_sched_persist() params the ports ptr will be replaced by
src and dst port.
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_core.c | 21 +++++++++++----------
1 files changed, 11 insertions(+), 10 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index e2bb3cd..9acdd79 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -200,7 +200,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
static struct ip_vs_conn *
ip_vs_sched_persist(struct ip_vs_service *svc,
struct sk_buff *skb,
- __be16 ports[2])
+ __be16 src_port, __be16 dst_port)
{
struct ip_vs_conn *cp = NULL;
struct ip_vs_iphdr iph;
@@ -224,8 +224,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
"mnet %s\n",
- IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]),
- IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]),
+ IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port),
+ IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port),
IP_VS_DBG_ADDR(svc->af, &snet));
/*
@@ -247,14 +247,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
__be16 vport = 0;
- if (ports[1] == svc->port) {
+ if (dst_port == svc->port) {
/* non-FTP template:
* <protocol, caddr, 0, vaddr, vport, daddr, dport>
* FTP template:
* <protocol, caddr, 0, vaddr, 0, daddr, 0>
*/
if (svc->port != FTPPORT)
- vport = ports[1];
+ vport = dst_port;
} else {
/* Note: persistent fwmark-based services and
* persistent port zero service are handled here.
@@ -285,7 +285,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
return NULL;
}
- if (ports[1] == svc->port && svc->port != FTPPORT)
+ if (dst_port == svc->port && svc->port != FTPPORT)
dport = dest->port;
/* Create a template
@@ -306,7 +306,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
kfree(param.pe_data);
}
- dport = ports[1];
+ dport = dst_port;
if (dport == svc->port && dest->port)
dport = dest->port;
@@ -317,8 +317,9 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
/*
* Create a new connection according to the template
*/
- ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0],
- &iph.daddr, ports[1], ¶m);
+ ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, src_port,
+ &iph.daddr, dst_port, ¶m);
+
cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest, skb->mark);
if (cp == NULL) {
ip_vs_conn_put(ct);
@@ -388,7 +389,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
*/
if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
*ignored = 0;
- return ip_vs_sched_persist(svc, skb, pptr);
+ return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1]);
}
/*
--
1.7.2.3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 3/8] IPVS: skb defrag in L7 helpers
2010-11-25 1:46 [PATCH 0/8] ipvs: ipvs update for nf-next-2.6 Simon Horman
2010-11-25 1:46 ` [PATCH 1/8] IPVS: Backup, Prepare for transferring firewall marks (fwmark) to the backup daemon Simon Horman
2010-11-25 1:46 ` [PATCH 2/8] IPVS: Split ports[2] into src_port and dst_port Simon Horman
@ 2010-11-25 1:46 ` Simon Horman
2010-11-25 1:46 ` [PATCH 4/8] IPVS: Handle Scheduling errors Simon Horman
` (5 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Simon Horman @ 2010-11-25 1:46 UTC (permalink / raw)
To: lvs-devel, netdev, netfilter, netfilter-devel
Cc: Hans Schillstrom, Julian Anastasov, Patrick McHardy, Simon Horman
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
L7 helpers like sip needs skb defrag
since L7 data can be fragmented.
This patch requires "IPVS Break ports-2 into src_port and dst_port" patch
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_pe_sip.c | 3 +++
1 files changed, 3 insertions(+), 0 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index b8b4e96..0d83bc0 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -71,6 +71,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
struct ip_vs_iphdr iph;
unsigned int dataoff, datalen, matchoff, matchlen;
const char *dptr;
+ int retc;
ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph);
@@ -83,6 +84,8 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
if (dataoff >= skb->len)
return -EINVAL;
+ if ((retc=skb_linearize(skb)) < 0)
+ return retc;
dptr = skb->data + dataoff;
datalen = skb->len - dataoff;
--
1.7.2.3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 4/8] IPVS: Handle Scheduling errors.
2010-11-25 1:46 [PATCH 0/8] ipvs: ipvs update for nf-next-2.6 Simon Horman
` (2 preceding siblings ...)
2010-11-25 1:46 ` [PATCH 3/8] IPVS: skb defrag in L7 helpers Simon Horman
@ 2010-11-25 1:46 ` Simon Horman
2010-11-25 1:46 ` [PATCH 5/8] IPVS: Backup, Adding structs for new sync format Simon Horman
` (4 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Simon Horman @ 2010-11-25 1:46 UTC (permalink / raw)
To: lvs-devel, netdev, netfilter, netfilter-devel
Cc: Hans Schillstrom, Julian Anastasov, Patrick McHardy, Simon Horman
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
If ip_vs_conn_fill_param_persist return an error to ip_vs_sched_persist,
this error must propagate as ignored=-1 to ip_vs_schedule().
Errors from ip_vs_conn_new() in ip_vs_sched_persist() and ip_vs_schedule()
should also return *ignored=-1;
This patch just relies on the fact that ignored is 1 before calling
ip_vs_sched_persist().
Sent from Julian:
"The new case when ip_vs_conn_fill_param_persist fails
should set *ignored = -1, so that we can use NF_DROP,
see below. *ignored = -1 should be also used for ip_vs_conn_new
failure in ip_vs_sched_persist() and ip_vs_schedule().
The new negative value should be handled in tcp,udp,sctp"
"To summarize:
- *ignored = 1:
protocol tried to schedule (eg. on SYN), found svc but the
svc/scheduler decides that this packet should be accepted with
NF_ACCEPT because it must not be scheduled.
- *ignored = 0:
scheduler can not find destination, so try bypass or
return ICMP and then NF_DROP (ip_vs_leave).
- *ignored = -1:
scheduler tried to schedule but fatal error occurred, eg.
ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param
failure such as missing Call-ID, ENOMEM on skb_linearize
or pe_data. In this case we should return NF_DROP without
any attempts to send ICMP with ip_vs_leave."
More or less all ideas and input to this patch is work from
Julian Anastasov
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_core.c | 56 +++++++++++++++++++++++---------
net/netfilter/ipvs/ip_vs_proto_sctp.c | 11 +++++--
net/netfilter/ipvs/ip_vs_proto_tcp.c | 10 +++++-
net/netfilter/ipvs/ip_vs_proto_udp.c | 10 +++++-
4 files changed, 64 insertions(+), 23 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 9acdd79..3445da6 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -177,7 +177,7 @@ ip_vs_set_state(struct ip_vs_conn *cp, int direction,
return pp->state_transition(cp, direction, skb, pp);
}
-static inline void
+static inline int
ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
struct sk_buff *skb, int protocol,
const union nf_inet_addr *caddr, __be16 cport,
@@ -187,7 +187,9 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p);
p->pe = svc->pe;
if (p->pe && p->pe->fill_param)
- p->pe->fill_param(p, skb);
+ return p->pe->fill_param(p, skb);
+
+ return 0;
}
/*
@@ -200,7 +202,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
static struct ip_vs_conn *
ip_vs_sched_persist(struct ip_vs_service *svc,
struct sk_buff *skb,
- __be16 src_port, __be16 dst_port)
+ __be16 src_port, __be16 dst_port, int *ignored)
{
struct ip_vs_conn *cp = NULL;
struct ip_vs_iphdr iph;
@@ -268,20 +270,27 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
vaddr = &fwmark;
}
}
- ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
- vaddr, vport, ¶m);
+ /* return *ignored = -1 so NF_DROP can be used */
+ if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0,
+ vaddr, vport, ¶m) < 0) {
+ *ignored = -1;
+ return NULL;
+ }
}
/* Check if a template already exists */
ct = ip_vs_ct_in_get(¶m);
if (!ct || !ip_vs_check_template(ct)) {
- /* No template found or the dest of the connection
+ /*
+ * No template found or the dest of the connection
* template is not available.
+ * return *ignored=0 i.e. ICMP and NF_DROP
*/
dest = svc->scheduler->schedule(svc, skb);
if (!dest) {
IP_VS_DBG(1, "p-schedule: no dest found.\n");
kfree(param.pe_data);
+ *ignored = 0;
return NULL;
}
@@ -296,6 +305,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
IP_VS_CONN_F_TEMPLATE, dest, skb->mark);
if (ct == NULL) {
kfree(param.pe_data);
+ *ignored = -1;
return NULL;
}
@@ -323,6 +333,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest, skb->mark);
if (cp == NULL) {
ip_vs_conn_put(ct);
+ *ignored = -1;
return NULL;
}
@@ -342,6 +353,21 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* It selects a server according to the virtual service, and
* creates a connection entry.
* Protocols supported: TCP, UDP
+ *
+ * Usage of *ignored
+ *
+ * 1 : protocol tried to schedule (eg. on SYN), found svc but the
+ * svc/scheduler decides that this packet should be accepted with
+ * NF_ACCEPT because it must not be scheduled.
+ *
+ * 0 : scheduler can not find destination, so try bypass or
+ * return ICMP and then NF_DROP (ip_vs_leave).
+ *
+ * -1 : scheduler tried to schedule but fatal error occurred, eg.
+ * ip_vs_conn_new failure (ENOMEM) or ip_vs_sip_fill_param
+ * failure such as missing Call-ID, ENOMEM on skb_linearize
+ * or pe_data. In this case we should return NF_DROP without
+ * any attempts to send ICMP with ip_vs_leave.
*/
struct ip_vs_conn *
ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
@@ -372,11 +398,9 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
}
/*
- * Do not schedule replies from local real server. It is risky
- * for fwmark services but mostly for persistent services.
+ * Do not schedule replies from local real server.
*/
if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
- (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) &&
(cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
"Not scheduling reply for existing connection");
@@ -387,10 +411,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
/*
* Persistent service
*/
- if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
- *ignored = 0;
- return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1]);
- }
+ if (svc->flags & IP_VS_SVC_F_PERSISTENT)
+ return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1], ignored);
+
+ *ignored = 0;
/*
* Non-persistent service
@@ -403,8 +427,6 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
return NULL;
}
- *ignored = 0;
-
dest = svc->scheduler->schedule(svc, skb);
if (dest == NULL) {
IP_VS_DBG(1, "Schedule: no dest found.\n");
@@ -425,8 +447,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
cp = ip_vs_conn_new(&p, &dest->addr,
dest->port ? dest->port : pptr[1],
flags, dest, skb->mark);
- if (!cp)
+ if (!cp) {
+ *ignored = -1;
return NULL;
+ }
}
IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u "
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 1ea96bc..a315159 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -47,13 +47,18 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
* incoming connection, and create a connection entry.
*/
*cpp = ip_vs_schedule(svc, skb, pp, &ignored);
- if (!*cpp && !ignored) {
- *verdict = ip_vs_leave(svc, skb, pp);
+ if (!*cpp && ignored <= 0) {
+ if (!ignored)
+ *verdict = ip_vs_leave(svc, skb, pp);
+ else {
+ ip_vs_service_put(svc);
+ *verdict = NF_DROP;
+ }
return 0;
}
ip_vs_service_put(svc);
}
-
+ /* NF_ACCEPT */
return 1;
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index f6c5200..1cdab12 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -64,12 +64,18 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
* incoming connection, and create a connection entry.
*/
*cpp = ip_vs_schedule(svc, skb, pp, &ignored);
- if (!*cpp && !ignored) {
- *verdict = ip_vs_leave(svc, skb, pp);
+ if (!*cpp && ignored <= 0) {
+ if (!ignored)
+ *verdict = ip_vs_leave(svc, skb, pp);
+ else {
+ ip_vs_service_put(svc);
+ *verdict = NF_DROP;
+ }
return 0;
}
ip_vs_service_put(svc);
}
+ /* NF_ACCEPT */
return 1;
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 9d106a0..cd398de 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -63,12 +63,18 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
* incoming connection, and create a connection entry.
*/
*cpp = ip_vs_schedule(svc, skb, pp, &ignored);
- if (!*cpp && !ignored) {
- *verdict = ip_vs_leave(svc, skb, pp);
+ if (!*cpp && ignored <= 0) {
+ if (!ignored)
+ *verdict = ip_vs_leave(svc, skb, pp);
+ else {
+ ip_vs_service_put(svc);
+ *verdict = NF_DROP;
+ }
return 0;
}
ip_vs_service_put(svc);
}
+ /* NF_ACCEPT */
return 1;
}
--
1.7.2.3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 5/8] IPVS: Backup, Adding structs for new sync format
2010-11-25 1:46 [PATCH 0/8] ipvs: ipvs update for nf-next-2.6 Simon Horman
` (3 preceding siblings ...)
2010-11-25 1:46 ` [PATCH 4/8] IPVS: Handle Scheduling errors Simon Horman
@ 2010-11-25 1:46 ` Simon Horman
2010-11-25 1:46 ` [PATCH 6/8] IPVS: Backup, Adding Version 1 receive capability Simon Horman
` (3 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Simon Horman @ 2010-11-25 1:46 UTC (permalink / raw)
To: lvs-devel, netdev, netfilter, netfilter-devel
Cc: Hans Schillstrom, Julian Anastasov, Patrick McHardy, Simon Horman
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
New structs defined for version 1 of sync.
* ip_vs_sync_v4 Ipv4 base format struct
* ip_vs_sync_v6 Ipv6 base format struct
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_sync.c | 154 ++++++++++++++++++++++++++++++++++++---
1 files changed, 142 insertions(+), 12 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 47eed67..566482f 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -43,11 +43,13 @@
#define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */
#define IP_VS_SYNC_PORT 8848 /* multicast port */
+#define SYNC_PROTO_VER 1 /* Protocol version in header */
/*
* IPVS sync connection entry
+ * Version 0, i.e. original version.
*/
-struct ip_vs_sync_conn {
+struct ip_vs_sync_conn_v0 {
__u8 reserved;
/* Protocol, addresses and port numbers */
@@ -71,40 +73,157 @@ struct ip_vs_sync_conn_options {
struct ip_vs_seq out_seq; /* outgoing seq. struct */
};
+/*
+ Sync Connection format (sync_conn)
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Type | Protocol | Ver. | Size |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Flags |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | State | cport |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | vport | dport |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | fwmark |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | timeout (in sec.) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | ... |
+ | IP-Addresses (v4 or v6) |
+ | ... |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ Optional Parameters.
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Param. Type | Param. Length | Param. data |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
+ | ... |
+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | | Param Type | Param. Length |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Param data |
+ | Last Param data should be padded for 32 bit alignment |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*/
+
+/*
+ * Type 0, IPv4 sync connection format
+ */
+struct ip_vs_sync_v4 {
+ __u8 type;
+ __u8 protocol; /* Which protocol (TCP/UDP) */
+ __be16 ver_size; /* Version msb 4 bits */
+ /* Flags and state transition */
+ __be32 flags; /* status flags */
+ __be16 state; /* state info */
+ /* Protocol, addresses and port numbers */
+ __be16 cport;
+ __be16 vport;
+ __be16 dport;
+ __be32 fwmark; /* Firewall mark from skb */
+ __be32 timeout; /* cp timeout */
+ __be32 caddr; /* client address */
+ __be32 vaddr; /* virtual address */
+ __be32 daddr; /* destination address */
+ /* The sequence options start here */
+ /* PE data padded to 32bit alignment after seq. options */
+};
+/*
+ * Type 2 messages IPv6
+ */
+struct ip_vs_sync_v6 {
+ __u8 type;
+ __u8 protocol; /* Which protocol (TCP/UDP) */
+ __be16 ver_size; /* Version msb 4 bits */
+ /* Flags and state transition */
+ __be32 flags; /* status flags */
+ __be16 state; /* state info */
+ /* Protocol, addresses and port numbers */
+ __be16 cport;
+ __be16 vport;
+ __be16 dport;
+ __be32 fwmark; /* Firewall mark from skb */
+ __be32 timeout; /* cp timeout */
+ struct in6_addr caddr; /* client address */
+ struct in6_addr vaddr; /* virtual address */
+ struct in6_addr daddr; /* destination address */
+ /* The sequence options start here */
+ /* PE data padded to 32bit alignment after seq. options */
+};
+
+union ip_vs_sync_conn {
+ struct ip_vs_sync_v4 v4;
+ struct ip_vs_sync_v6 v6;
+};
+
+/* Bits in Type field in above */
+#define STYPE_INET6 0
+#define STYPE_F_INET6 (1 << STYPE_INET6)
+
+#define SVER_SHIFT 12 /* Shift to get version */
+#define SVER_MASK 0x0fff /* Mask to strip version */
+
+#define IPVS_OPT_SEQ_DATA 1
+#define IPVS_OPT_PE_DATA 2
+#define IPVS_OPT_PE_NAME 3
+#define IPVS_OPT_PARAM 7
+
+#define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1))
+#define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1))
+#define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1))
+#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
+
struct ip_vs_sync_thread_data {
struct socket *sock;
char *buf;
};
-#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn))
+/* Version 0 definition of packet sizes */
+#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0))
#define FULL_CONN_SIZE \
-(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))
+(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
/*
- The master mulitcasts messages to the backup load balancers in the
- following format.
+ The master mulitcasts messages (Datagrams) to the backup load balancers
+ in the following format.
+
+ Version 1:
+ Note, first byte should be Zero, so ver 0 receivers will drop the packet.
0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Count Conns | SyncID | Size |
+ | 0 | SyncID | Size |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Count Conns | Version | Reserved, set to Zero |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| |
| IPVS Sync Connection (1) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| . |
- | . |
+ ~ . ~
| . |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| |
| IPVS Sync Connection (n) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Version 0 Header
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Count Conns | SyncID | Size |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | IPVS Sync Connection (1) |
*/
#define SYNC_MESG_HEADER_LEN 4
#define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */
+/* Version 0 header */
struct ip_vs_sync_mesg {
__u8 nr_conns;
__u8 syncid;
@@ -113,6 +232,17 @@ struct ip_vs_sync_mesg {
/* ip_vs_sync_conn entries start here */
};
+/* Version 1 header */
+struct ip_vs_sync_mesg_v2 {
+ __u8 reserved; /* must be zero */
+ __u8 syncid;
+ __u16 size;
+ __u8 nr_conns;
+ __s8 version; /* SYNC_PROTO_VER */
+ __u16 spare;
+ /* ip_vs_sync_conn entries start here */
+};
+
/* the maximum length of sync (sending/receiving) message */
static int sync_send_mesg_maxlen;
static int sync_recv_mesg_maxlen;
@@ -239,7 +369,7 @@ get_curr_sync_buff(unsigned long time)
void ip_vs_sync_conn(const struct ip_vs_conn *cp)
{
struct ip_vs_sync_mesg *m;
- struct ip_vs_sync_conn *s;
+ struct ip_vs_sync_conn_v0 *s;
int len;
spin_lock(&curr_sb_lock);
@@ -254,7 +384,7 @@ void ip_vs_sync_conn(const struct ip_vs_conn *cp)
len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
SIMPLE_CONN_SIZE;
m = curr_sb->mesg;
- s = (struct ip_vs_sync_conn *)curr_sb->head;
+ s = (struct ip_vs_sync_conn_v0 *)curr_sb->head;
/* copy members */
s->protocol = cp->protocol;
@@ -306,7 +436,7 @@ ip_vs_conn_fill_param_sync(int af, int protocol,
static void ip_vs_process_message(char *buffer, const size_t buflen)
{
struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
- struct ip_vs_sync_conn *s;
+ struct ip_vs_sync_conn_v0 *s;
struct ip_vs_sync_conn_options *opt;
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
@@ -343,7 +473,7 @@ static void ip_vs_process_message(char *buffer, const size_t buflen)
IP_VS_ERR_RL("bogus conn in sync message\n");
return;
}
- s = (struct ip_vs_sync_conn *) p;
+ s = (struct ip_vs_sync_conn_v0 *) p;
flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
flags &= ~IP_VS_CONN_F_HASHED;
if (flags & IP_VS_CONN_F_SEQ_MASK) {
@@ -849,7 +979,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
- sizeof(struct ip_vs_sync_conn));
+ sizeof(struct ip_vs_sync_conn_v0));
if (state == IP_VS_STATE_MASTER) {
if (sync_master_thread)
--
1.7.2.3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 6/8] IPVS: Backup, Adding Version 1 receive capability
2010-11-25 1:46 [PATCH 0/8] ipvs: ipvs update for nf-next-2.6 Simon Horman
` (4 preceding siblings ...)
2010-11-25 1:46 ` [PATCH 5/8] IPVS: Backup, Adding structs for new sync format Simon Horman
@ 2010-11-25 1:46 ` Simon Horman
2010-11-25 1:46 ` [PATCH 7/8] IPVS: Backup, Change sending to Version 1 format Simon Horman
` (2 subsequent siblings)
8 siblings, 0 replies; 10+ messages in thread
From: Simon Horman @ 2010-11-25 1:46 UTC (permalink / raw)
To: lvs-devel, netdev, netfilter, netfilter-devel
Cc: Hans Schillstrom, Julian Anastasov, Patrick McHardy, Simon Horman
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
Functionality improvements
* flags changed from 16 to 32 bits
* fwmark added (32 bits)
* timeout in sec. added (32 bits)
* pe data added (Variable length)
* IPv6 capabilities (3x16 bytes for addr.)
* Version and type in every conn msg.
ip_vs_process_message() now handles Version 1 messages
and will call ip_vs_process_message_v0() for version 0 messages.
ip_vs_proc_conn() is common for both version, and handles the update of
connection hash.
ip_vs_conn_fill_param_sync() - Version 1 messages only
ip_vs_conn_fill_param_sync_v0() - Version 0 messages only
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/linux/ip_vs.h | 8 +
include/net/ip_vs.h | 1 +
net/netfilter/ipvs/ip_vs_pe.c | 5 +-
net/netfilter/ipvs/ip_vs_sync.c | 549 ++++++++++++++++++++++++++++++---------
4 files changed, 440 insertions(+), 123 deletions(-)
diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h
index 5f43a3b..4deb383 100644
--- a/include/linux/ip_vs.h
+++ b/include/linux/ip_vs.h
@@ -89,6 +89,14 @@
#define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */
#define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */
+#define IP_VS_CONN_F_BACKUP_MASK (IP_VS_CONN_F_FWD_MASK | \
+ IP_VS_CONN_F_NOOUTPUT | \
+ IP_VS_CONN_F_INACTIVE | \
+ IP_VS_CONN_F_SEQ_MASK | \
+ IP_VS_CONN_F_NO_CPORT | \
+ IP_VS_CONN_F_TEMPLATE \
+ )
+
/* Flags that are not sent to backup server start from bit 16 */
#define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 890f01c..4069484 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -817,6 +817,7 @@ void ip_vs_unbind_pe(struct ip_vs_service *svc);
int register_ip_vs_pe(struct ip_vs_pe *pe);
int unregister_ip_vs_pe(struct ip_vs_pe *pe);
struct ip_vs_pe *ip_vs_pe_getbyname(const char *name);
+struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name);
static inline void ip_vs_pe_get(const struct ip_vs_pe *pe)
{
diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c
index e99f920..5cf859c 100644
--- a/net/netfilter/ipvs/ip_vs_pe.c
+++ b/net/netfilter/ipvs/ip_vs_pe.c
@@ -29,12 +29,11 @@ void ip_vs_unbind_pe(struct ip_vs_service *svc)
}
/* Get pe in the pe list by name */
-static struct ip_vs_pe *
-__ip_vs_pe_getbyname(const char *pe_name)
+struct ip_vs_pe *__ip_vs_pe_getbyname(const char *pe_name)
{
struct ip_vs_pe *pe;
- IP_VS_DBG(2, "%s(): pe_name \"%s\"\n", __func__,
+ IP_VS_DBG(10, "%s(): pe_name \"%s\"\n", __func__,
pe_name);
spin_lock_bh(&ip_vs_pe_lock);
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 566482f..e071508 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -35,6 +35,8 @@
#include <linux/wait.h>
#include <linux/kernel.h>
+#include <asm/unaligned.h> /* Used for ntoh_seq and hton_seq */
+
#include <net/ip.h>
#include <net/sock.h>
@@ -286,6 +288,16 @@ static struct sockaddr_in mcast_addr = {
.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP),
};
+/*
+ * Copy of struct ip_vs_seq
+ * From unaligned network order to aligned host order
+ */
+static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
+{
+ ho->init_seq = get_unaligned_be32(&no->init_seq);
+ ho->delta = get_unaligned_be32(&no->delta);
+ ho->previous_delta = get_unaligned_be32(&no->previous_delta);
+}
static inline struct ip_vs_sync_buff *sb_dequeue(void)
{
@@ -418,59 +430,186 @@ void ip_vs_sync_conn(const struct ip_vs_conn *cp)
ip_vs_sync_conn(cp->control);
}
+/*
+ * fill_param used by version 1
+ */
static inline int
-ip_vs_conn_fill_param_sync(int af, int protocol,
- const union nf_inet_addr *caddr, __be16 cport,
- const union nf_inet_addr *vaddr, __be16 vport,
- struct ip_vs_conn_param *p)
+ip_vs_conn_fill_param_sync(int af, union ip_vs_sync_conn *sc,
+ struct ip_vs_conn_param *p,
+ __u8 *pe_data, unsigned int pe_data_len,
+ __u8 *pe_name, unsigned int pe_name_len)
{
- /* XXX: Need to take into account persistence engine */
- ip_vs_conn_fill_param(af, protocol, caddr, cport, vaddr, vport, p);
+#ifdef CONFIG_IP_VS_IPV6
+ if (af == AF_INET6)
+ ip_vs_conn_fill_param(af, sc->v6.protocol,
+ (const union nf_inet_addr *)&sc->v6.caddr,
+ sc->v6.cport,
+ (const union nf_inet_addr *)&sc->v6.vaddr,
+ sc->v6.vport, p);
+ else
+#endif
+ ip_vs_conn_fill_param(af, sc->v4.protocol,
+ (const union nf_inet_addr *)&sc->v4.caddr,
+ sc->v4.cport,
+ (const union nf_inet_addr *)&sc->v4.vaddr,
+ sc->v4.vport, p);
+ /* Handle pe data */
+ if (pe_data_len) {
+ if (pe_name_len) {
+ char buff[IP_VS_PENAME_MAXLEN+1];
+
+ memcpy(buff, pe_name, pe_name_len);
+ buff[pe_name_len]=0;
+ p->pe = __ip_vs_pe_getbyname(buff);
+ if (!p->pe) {
+ IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n", buff);
+ return 1;
+ }
+ } else {
+ IP_VS_ERR_RL("BACKUP, Invalid PE parameters\n");
+ return 1;
+ }
+
+ p->pe_data = kmalloc(pe_data_len, GFP_ATOMIC);
+ if (!p->pe_data) {
+ if (p->pe->module)
+ module_put(p->pe->module);
+ return -ENOMEM;
+ }
+ memcpy(p->pe_data, pe_data, pe_data_len);
+ p->pe_data_len = pe_data_len;
+ }
return 0;
}
/*
- * Process received multicast message and create the corresponding
- * ip_vs_conn entries.
+ * Connection Add / Update.
+ * Common for version 0 and 1 reception of backup sync_conns.
+ * Param: ...
+ * timeout is in sec.
+ */
+static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags,
+ unsigned state, unsigned protocol, unsigned type,
+ const union nf_inet_addr *daddr, __be16 dport,
+ unsigned long timeout, __u32 fwmark,
+ struct ip_vs_sync_conn_options *opt,
+ struct ip_vs_protocol *pp)
+{
+ struct ip_vs_dest *dest;
+ struct ip_vs_conn *cp;
+
+
+ if (!(flags & IP_VS_CONN_F_TEMPLATE))
+ cp = ip_vs_conn_in_get(param);
+ else
+ cp = ip_vs_ct_in_get(param);
+
+ if (cp && param->pe_data) /* Free pe_data */
+ kfree(param->pe_data);
+ if (!cp) {
+ /*
+ * Find the appropriate destination for the connection.
+ * If it is not found the connection will remain unbound
+ * but still handled.
+ */
+ dest = ip_vs_find_dest(type, daddr, dport, param->vaddr,
+ param->vport, protocol, fwmark);
+
+ /* Set the approprite ativity flag */
+ if (protocol == IPPROTO_TCP) {
+ if (state != IP_VS_TCP_S_ESTABLISHED)
+ flags |= IP_VS_CONN_F_INACTIVE;
+ else
+ flags &= ~IP_VS_CONN_F_INACTIVE;
+ } else if (protocol == IPPROTO_SCTP) {
+ if (state != IP_VS_SCTP_S_ESTABLISHED)
+ flags |= IP_VS_CONN_F_INACTIVE;
+ else
+ flags &= ~IP_VS_CONN_F_INACTIVE;
+ }
+ cp = ip_vs_conn_new(param, daddr, dport, flags, dest, fwmark);
+ if (dest)
+ atomic_dec(&dest->refcnt);
+ if (!cp) {
+ if (param->pe_data)
+ kfree(param->pe_data);
+ IP_VS_DBG(2, "BACKUP, add new conn. failed\n");
+ return;
+ }
+ } else if (!cp->dest) {
+ dest = ip_vs_try_bind_dest(cp);
+ if (dest)
+ atomic_dec(&dest->refcnt);
+ } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
+ (cp->state != state)) {
+ /* update active/inactive flag for the connection */
+ dest = cp->dest;
+ if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+ (state != IP_VS_TCP_S_ESTABLISHED)) {
+ atomic_dec(&dest->activeconns);
+ atomic_inc(&dest->inactconns);
+ cp->flags |= IP_VS_CONN_F_INACTIVE;
+ } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
+ (state == IP_VS_TCP_S_ESTABLISHED)) {
+ atomic_inc(&dest->activeconns);
+ atomic_dec(&dest->inactconns);
+ cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+ }
+ } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
+ (cp->state != state)) {
+ dest = cp->dest;
+ if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
+ (state != IP_VS_SCTP_S_ESTABLISHED)) {
+ atomic_dec(&dest->activeconns);
+ atomic_inc(&dest->inactconns);
+ cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+ }
+ }
+
+ if (opt)
+ memcpy(&cp->in_seq, opt, sizeof(*opt));
+ atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
+ cp->state = state;
+ cp->old_state = cp->state;
+ /*
+ * For Ver 0 messages style
+ * - Not possible to recover the right timeout for templates
+ * - can not find the right fwmark
+ * virtual service. If needed, we can do it for
+ * non-fwmark persistent services.
+ * Ver 1 messages style.
+ * - No problem.
+ */
+ if (timeout) {
+ if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
+ timeout = MAX_SCHEDULE_TIMEOUT / HZ;
+ cp->timeout = timeout*HZ;
+ } else if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
+ cp->timeout = pp->timeout_table[state];
+ else
+ cp->timeout = (3*60*HZ);
+ ip_vs_conn_put(cp);
+}
+
+/*
+ * Process received multicast message for Version 0
*/
-static void ip_vs_process_message(char *buffer, const size_t buflen)
+static void ip_vs_process_message_v0(const char *buffer, const size_t buflen)
{
struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
struct ip_vs_sync_conn_v0 *s;
struct ip_vs_sync_conn_options *opt;
- struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
- struct ip_vs_dest *dest;
struct ip_vs_conn_param param;
char *p;
int i;
- if (buflen < sizeof(struct ip_vs_sync_mesg)) {
- IP_VS_ERR_RL("sync message header too short\n");
- return;
- }
-
- /* Convert size back to host byte order */
- m->size = ntohs(m->size);
-
- if (buflen != m->size) {
- IP_VS_ERR_RL("bogus sync message size\n");
- return;
- }
-
- /* SyncID sanity check */
- if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) {
- IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n",
- m->syncid);
- return;
- }
-
p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
for (i=0; i<m->nr_conns; i++) {
unsigned flags, state;
if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
- IP_VS_ERR_RL("bogus conn in sync message\n");
+ IP_VS_ERR_RL("BACKUP v0, bogus conn\n");
return;
}
s = (struct ip_vs_sync_conn_v0 *) p;
@@ -480,7 +619,7 @@ static void ip_vs_process_message(char *buffer, const size_t buflen)
opt = (struct ip_vs_sync_conn_options *)&s[1];
p += FULL_CONN_SIZE;
if (p > buffer+buflen) {
- IP_VS_ERR_RL("bogus conn options in sync message\n");
+ IP_VS_ERR_RL("BACKUP v0, Dropping buffer bogus conn options\n");
return;
}
} else {
@@ -492,12 +631,12 @@ static void ip_vs_process_message(char *buffer, const size_t buflen)
if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
pp = ip_vs_proto_get(s->protocol);
if (!pp) {
- IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n",
+ IP_VS_DBG(2, "BACKUP v0, Unsupported protocol %u\n",
s->protocol);
continue;
}
if (state >= pp->num_states) {
- IP_VS_DBG(2, "Invalid %s state %u in sync msg\n",
+ IP_VS_DBG(2, "BACKUP v0, Invalid %s state %u\n",
pp->name, state);
continue;
}
@@ -505,103 +644,273 @@ static void ip_vs_process_message(char *buffer, const size_t buflen)
/* protocol in templates is not used for state/timeout */
pp = NULL;
if (state > 0) {
- IP_VS_DBG(2, "Invalid template state %u in sync msg\n",
+ IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
state);
state = 0;
}
}
- if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol,
- (union nf_inet_addr *)&s->caddr,
- s->cport,
- (union nf_inet_addr *)&s->vaddr,
- s->vport, ¶m)) {
- pr_err("ip_vs_conn_fill_param_sync failed");
- return;
+ ip_vs_conn_fill_param(AF_INET, s->protocol,
+ (const union nf_inet_addr *)&s->caddr,
+ s->cport,
+ (const union nf_inet_addr *)&s->vaddr,
+ s->vport, ¶m);
+
+ /* Send timeout as Zero */
+ ip_vs_proc_conn(¶m, flags, state, s->protocol, AF_INET,
+ (union nf_inet_addr *)&s->daddr, s->dport,
+ 0, 0, opt, pp);
+ }
+}
+
+/*
+ * Handle options
+ */
+static inline int ip_vs_proc_seqopt(__u8 *p, unsigned int plen,
+ __u32 *opt_flags,
+ struct ip_vs_sync_conn_options *opt)
+{
+ struct ip_vs_sync_conn_options *topt;
+
+ topt = (struct ip_vs_sync_conn_options *)p;
+
+ if (plen != sizeof(struct ip_vs_sync_conn_options)) {
+ IP_VS_DBG(2, "BACKUP, bogus conn options length\n");
+ return -EINVAL;
+ }
+ if (*opt_flags & IPVS_OPT_F_SEQ_DATA) {
+ IP_VS_DBG(2, "BACKUP, conn options found twice\n");
+ return -EINVAL;
+ }
+ ntoh_seq(&topt->in_seq, &opt->in_seq);
+ ntoh_seq(&topt->out_seq, &opt->out_seq);
+ *opt_flags |= IPVS_OPT_F_SEQ_DATA;
+ return 0;
+}
+
+static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
+ __u8 **data, unsigned int maxlen,
+ __u32 *opt_flags, __u32 flag)
+{
+ if (plen > maxlen) {
+ IP_VS_DBG(2, "BACKUP, bogus par.data len > %d\n", maxlen);
+ return -EINVAL;
+ }
+ if (*opt_flags & flag) {
+ IP_VS_DBG(2, "BACKUP, Par.data found twice 0x%x\n", flag);
+ return -EINVAL;
+ }
+ *data_len = plen;
+ *data = p;
+ *opt_flags |= flag;
+ return 0;
+}
+/*
+ * Process a Version 1 sync. connection
+ */
+static inline int ip_vs_proc_sync_conn(__u8 *p, __u8 *msg_end)
+{
+ struct ip_vs_sync_conn_options opt;
+ union ip_vs_sync_conn *s;
+ struct ip_vs_protocol *pp;
+ struct ip_vs_conn_param param;
+ __u32 flags;
+ unsigned int af, state, pe_data_len=0, pe_name_len=0;
+ __u8 *pe_data=NULL, *pe_name=NULL;
+ __u32 opt_flags=0;
+ int retc=0;
+
+ s = (union ip_vs_sync_conn *) p;
+
+ if (s->v6.type & STYPE_F_INET6) {
+#ifdef CONFIG_IP_VS_IPV6
+ af = AF_INET6;
+ p += sizeof(struct ip_vs_sync_v6);
+#else
+ IP_VS_DBG(3,"BACKUP, IPv6 msg received, and IPVS is not compiled for IPv6\n");
+ retc = 10;
+ goto out;
+#endif
+ } else if (!s->v4.type) {
+ af = AF_INET;
+ p += sizeof(struct ip_vs_sync_v4);
+ } else {
+ return -10;
+ }
+ if (p > msg_end)
+ return -20;
+
+ /* Process optional params check Type & Len. */
+ while (p < msg_end) {
+ int ptype;
+ int plen;
+
+ if (p+2 > msg_end)
+ return -30;
+ ptype = *(p++);
+ plen = *(p++);
+
+ if (!plen || ((p + plen) > msg_end))
+ return -40;
+ /* Handle seq option p = param data */
+ switch (ptype & ~IPVS_OPT_F_PARAM) {
+ case IPVS_OPT_SEQ_DATA:
+ if (ip_vs_proc_seqopt(p, plen, &opt_flags, &opt))
+ return -50;
+ break;
+
+ case IPVS_OPT_PE_DATA:
+ if (ip_vs_proc_str(p, plen, &pe_data_len, &pe_data,
+ IP_VS_PEDATA_MAXLEN, &opt_flags,
+ IPVS_OPT_F_PE_DATA))
+ return -60;
+ break;
+
+ case IPVS_OPT_PE_NAME:
+ if (ip_vs_proc_str(p, plen,&pe_name_len, &pe_name,
+ IP_VS_PENAME_MAXLEN, &opt_flags,
+ IPVS_OPT_F_PE_NAME))
+ return -70;
+ break;
+
+ default:
+ /* Param data mandatory ? */
+ if (!(ptype & IPVS_OPT_F_PARAM)) {
+ IP_VS_DBG(3, "BACKUP, Unknown mandatory param %d found\n",
+ ptype & ~IPVS_OPT_F_PARAM);
+ retc = 20;
+ goto out;
+ }
}
- if (!(flags & IP_VS_CONN_F_TEMPLATE))
- cp = ip_vs_conn_in_get(¶m);
- else
- cp = ip_vs_ct_in_get(¶m);
- if (!cp) {
- /*
- * Find the appropriate destination for the connection.
- * If it is not found the connection will remain unbound
- * but still handled.
- */
- dest = ip_vs_find_dest(AF_INET,
- (union nf_inet_addr *)&s->daddr,
- s->dport,
- (union nf_inet_addr *)&s->vaddr,
- s->vport,
- s->protocol, 0);
- /* Set the approprite ativity flag */
- if (s->protocol == IPPROTO_TCP) {
- if (state != IP_VS_TCP_S_ESTABLISHED)
- flags |= IP_VS_CONN_F_INACTIVE;
- else
- flags &= ~IP_VS_CONN_F_INACTIVE;
- } else if (s->protocol == IPPROTO_SCTP) {
- if (state != IP_VS_SCTP_S_ESTABLISHED)
- flags |= IP_VS_CONN_F_INACTIVE;
- else
- flags &= ~IP_VS_CONN_F_INACTIVE;
+ p += plen; /* Next option */
+ }
+
+ /* Get flags and Mask off unsupported */
+ flags = ntohl(s->v4.flags) & IP_VS_CONN_F_BACKUP_MASK;
+ flags |= IP_VS_CONN_F_SYNC;
+ state = ntohs(s->v4.state);
+
+ if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
+ pp = ip_vs_proto_get(s->v4.protocol);
+ if (!pp) {
+ IP_VS_DBG(3,"BACKUP, Unsupported protocol %u\n",
+ s->v4.protocol);
+ retc = 30;
+ goto out;
+ }
+ if (state >= pp->num_states) {
+ IP_VS_DBG(3, "BACKUP, Invalid %s state %u\n",
+ pp->name, state);
+ retc = 40;
+ goto out;
+ }
+ } else {
+ /* protocol in templates is not used for state/timeout */
+ pp = NULL;
+ if (state > 0) {
+ IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
+ state);
+ state = 0;
+ }
+ }
+ if (ip_vs_conn_fill_param_sync(af, s, ¶m,
+ pe_data, pe_data_len,
+ pe_name, pe_name_len)) {
+ retc = 50;
+ goto out;
+ }
+ /* If only IPv4, just silent skip IPv6 */
+ if (af == AF_INET)
+ ip_vs_proc_conn(¶m, flags, state, s->v4.protocol, af,
+ (union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
+ ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
+ (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL),
+ pp);
+#ifdef CONFIG_IP_VS_IPV6
+ else
+ ip_vs_proc_conn(¶m, flags, state, s->v6.protocol, af,
+ (union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
+ ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
+ (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL),
+ pp);
+#endif
+ return 0;
+ /* Error exit */
+out:
+ IP_VS_DBG(2, "BACKUP, Single msg dropped err:%d\n", retc);
+ return retc;
+
+}
+/*
+ * Process received multicast message and create the corresponding
+ * ip_vs_conn entries.
+ * Handles Version 0 & 1
+ */
+static void ip_vs_process_message(__u8 *buffer, const size_t buflen)
+{
+ struct ip_vs_sync_mesg_v2 *m2 = (struct ip_vs_sync_mesg_v2 *)buffer;
+ __u8 *p, *msg_end;
+ unsigned int i, nr_conns;
+
+ if (buflen < sizeof(struct ip_vs_sync_mesg)) {
+ IP_VS_DBG(2, "BACKUP, message header too short\n");
+ return;
+ }
+ /* Convert size back to host byte order */
+ m2->size = ntohs(m2->size);
+
+ if (buflen != m2->size) {
+ IP_VS_DBG(2, "BACKUP, bogus message size\n");
+ return;
+ }
+ /* SyncID sanity check */
+ if (ip_vs_backup_syncid != 0 && m2->syncid != ip_vs_backup_syncid) {
+ IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
+ return;
+ }
+ /* Handle version 1 message */
+ if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
+ && (m2->spare == 0)) {
+
+ msg_end = buffer + sizeof(struct ip_vs_sync_mesg_v2);
+ nr_conns = m2->nr_conns;
+
+ for (i=0; i<nr_conns; i++) {
+ union ip_vs_sync_conn *s;
+ unsigned size;
+ int retc;
+
+ p = msg_end;
+ if (p + sizeof(s->v4) > buffer+buflen) {
+ IP_VS_ERR_RL("BACKUP, Dropping buffer, to small\n");
+ return;
}
- cp = ip_vs_conn_new(¶m,
- (union nf_inet_addr *)&s->daddr,
- s->dport, flags, dest, 0);
- if (dest)
- atomic_dec(&dest->refcnt);
- if (!cp) {
- pr_err("ip_vs_conn_new failed\n");
+ s = (union ip_vs_sync_conn *)p;
+ size = ntohs(s->v4.ver_size) & SVER_MASK;
+ msg_end = p + size;
+ /* Basic sanity checks */
+ if (msg_end > buffer+buflen) {
+ IP_VS_ERR_RL("BACKUP, Dropping buffer, msg > buffer\n");
return;
}
- } else if (!cp->dest) {
- dest = ip_vs_try_bind_dest(cp);
- if (dest)
- atomic_dec(&dest->refcnt);
- } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
- (cp->state != state)) {
- /* update active/inactive flag for the connection */
- dest = cp->dest;
- if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
- (state != IP_VS_TCP_S_ESTABLISHED)) {
- atomic_dec(&dest->activeconns);
- atomic_inc(&dest->inactconns);
- cp->flags |= IP_VS_CONN_F_INACTIVE;
- } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
- (state == IP_VS_TCP_S_ESTABLISHED)) {
- atomic_inc(&dest->activeconns);
- atomic_dec(&dest->inactconns);
- cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+ if (ntohs(s->v4.ver_size) >> SVER_SHIFT) {
+ IP_VS_ERR_RL("BACKUP, Dropping buffer, Unknown version %d\n",
+ ntohs(s->v4.ver_size) >> SVER_SHIFT);
+ return;
}
- } else if ((cp->dest) && (cp->protocol == IPPROTO_SCTP) &&
- (cp->state != state)) {
- dest = cp->dest;
- if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
- (state != IP_VS_SCTP_S_ESTABLISHED)) {
- atomic_dec(&dest->activeconns);
- atomic_inc(&dest->inactconns);
- cp->flags &= ~IP_VS_CONN_F_INACTIVE;
+ /* Process a single sync_conn */
+ if ((retc=ip_vs_proc_sync_conn(p, msg_end)) < 0) {
+ IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
+ retc);
+ return;
}
+ /* Make sure we have 32 bit alignment */
+ msg_end = p + ((size + 3) & ~3);
}
-
- if (opt)
- memcpy(&cp->in_seq, opt, sizeof(*opt));
- atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
- cp->state = state;
- cp->old_state = cp->state;
- /*
- * We can not recover the right timeout for templates
- * in all cases, we can not find the right fwmark
- * virtual service. If needed, we can do it for
- * non-fwmark persistent services.
- */
- if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
- cp->timeout = pp->timeout_table[state];
- else
- cp->timeout = (3*60*HZ);
- ip_vs_conn_put(cp);
+ } else {
+ /* Old type of message */
+ ip_vs_process_message_v0(buffer, buflen);
+ return;
}
}
--
1.7.2.3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 7/8] IPVS: Backup, Change sending to Version 1 format
2010-11-25 1:46 [PATCH 0/8] ipvs: ipvs update for nf-next-2.6 Simon Horman
` (5 preceding siblings ...)
2010-11-25 1:46 ` [PATCH 6/8] IPVS: Backup, Adding Version 1 receive capability Simon Horman
@ 2010-11-25 1:46 ` Simon Horman
2010-11-25 1:46 ` [PATCH 8/8] IPVS: Backup, adding version 0 sending capabilities Simon Horman
2010-11-25 13:03 ` [PATCH 0/8] ipvs: ipvs update for nf-next-2.6 Patrick McHardy
8 siblings, 0 replies; 10+ messages in thread
From: Simon Horman @ 2010-11-25 1:46 UTC (permalink / raw)
To: lvs-devel, netdev, netfilter, netfilter-devel
Cc: Hans Schillstrom, Julian Anastasov, Patrick McHardy, Simon Horman
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
Enable sending and removal of version 0 sending
Affected functions,
ip_vs_sync_buff_create()
ip_vs_sync_conn()
ip_vs_core.c removal of IPv4 check.
*v5
Just check cp->pe_data_len in ip_vs_sync_conn
Check if padding needed before adding a new sync_conn
to the buffer, i.e. avoid sending padding at the end.
*v4
moved sanity check and pe_name_len after sloop.
use cp->pe instead of cp->dest->svc->pe
real length in each sync_conn, not padded length
however total size of a sync_msg includes padding.
*v3
Sending ip_vs_sync_conn_options in network order.
Sending Templates for ONE_PACKET conn.
Renaming of ip_vs_sync_mesg to ip_vs_sync_mesg_v0
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 2 +-
net/netfilter/ipvs/ip_vs_core.c | 13 ++-
net/netfilter/ipvs/ip_vs_sync.c | 189 ++++++++++++++++++++++++++++++---------
3 files changed, 156 insertions(+), 48 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 4069484..a715f3d 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -919,7 +919,7 @@ extern char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid);
extern int stop_sync_thread(int state);
-extern void ip_vs_sync_conn(const struct ip_vs_conn *cp);
+extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
/*
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 3445da6..5287771 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1560,9 +1560,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
*
* Sync connection if it is about to close to
* encorage the standby servers to update the connections timeout
+ *
+ * For ONE_PKT let ip_vs_sync_conn() do the filter work.
*/
- pkts = atomic_add_return(1, &cp->in_pkts);
- if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+ if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+ pkts = sysctl_ip_vs_sync_threshold[0];
+ else
+ pkts = atomic_add_return(1, &cp->in_pkts);
+
+ if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
cp->protocol == IPPROTO_SCTP) {
if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
(pkts % sysctl_ip_vs_sync_threshold[1]
@@ -1577,8 +1583,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
}
/* Keep this block last: TCP and others with pp->num_states <= 1 */
- else if (af == AF_INET &&
- (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+ else if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
(((cp->protocol != IPPROTO_TCP ||
cp->state == IP_VS_TCP_S_ESTABLISHED) &&
(pkts % sysctl_ip_vs_sync_threshold[1]
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index e071508..df5abf0 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -226,7 +226,7 @@ struct ip_vs_sync_thread_data {
#define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */
/* Version 0 header */
-struct ip_vs_sync_mesg {
+struct ip_vs_sync_mesg_v0 {
__u8 nr_conns;
__u8 syncid;
__u16 size;
@@ -235,7 +235,7 @@ struct ip_vs_sync_mesg {
};
/* Version 1 header */
-struct ip_vs_sync_mesg_v2 {
+struct ip_vs_sync_mesg {
__u8 reserved; /* must be zero */
__u8 syncid;
__u16 size;
@@ -299,6 +299,17 @@ static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
ho->previous_delta = get_unaligned_be32(&no->previous_delta);
}
+/*
+ * Copy of struct ip_vs_seq
+ * From Aligned host order to unaligned network order
+ */
+static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
+{
+ put_unaligned_be32(ho->init_seq, &no->init_seq);
+ put_unaligned_be32(ho->delta, &no->delta);
+ put_unaligned_be32(ho->previous_delta, &no->previous_delta);
+}
+
static inline struct ip_vs_sync_buff *sb_dequeue(void)
{
struct ip_vs_sync_buff *sb;
@@ -317,6 +328,9 @@ static inline struct ip_vs_sync_buff *sb_dequeue(void)
return sb;
}
+/*
+ * Create a new sync buffer for Version 1 proto.
+ */
static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
{
struct ip_vs_sync_buff *sb;
@@ -328,11 +342,15 @@ static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
kfree(sb);
return NULL;
}
- sb->mesg->nr_conns = 0;
+ sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */
+ sb->mesg->version = SYNC_PROTO_VER;
sb->mesg->syncid = ip_vs_master_syncid;
- sb->mesg->size = 4;
- sb->head = (unsigned char *)sb->mesg + 4;
+ sb->mesg->size = sizeof(struct ip_vs_sync_mesg);
+ sb->mesg->nr_conns = 0;
+ sb->mesg->spare = 0;
+ sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen;
+
sb->firstuse = jiffies;
return sb;
}
@@ -373,18 +391,60 @@ get_curr_sync_buff(unsigned long time)
return sb;
}
-
/*
* Add an ip_vs_conn information into the current sync_buff.
* Called by ip_vs_in.
+ * Sending Version 1 messages
*/
-void ip_vs_sync_conn(const struct ip_vs_conn *cp)
+void ip_vs_sync_conn(struct ip_vs_conn *cp)
{
struct ip_vs_sync_mesg *m;
- struct ip_vs_sync_conn_v0 *s;
- int len;
+ union ip_vs_sync_conn *s;
+ __u8 *p;
+ unsigned int len, pe_name_len, pad;
+
+ /* Do not sync ONE PACKET */
+ if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+ goto control;
+sloop:
+ /* Sanity checks */
+ pe_name_len = 0;
+ if (cp->pe_data_len) {
+ if (!cp->pe_data || !cp->dest) {
+ IP_VS_ERR_RL("SYNC, connection pe_data invalid\n");
+ return;
+ }
+ pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
+ }
spin_lock(&curr_sb_lock);
+
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6)
+ len = sizeof(struct ip_vs_sync_v6);
+ else
+#endif
+ len = sizeof(struct ip_vs_sync_v4);
+
+ if (cp->flags & IP_VS_CONN_F_SEQ_MASK)
+ len += sizeof(struct ip_vs_sync_conn_options) + 2;
+
+ if (cp->pe_data_len)
+ len += cp->pe_data_len + 2; /* + Param hdr field */
+ if (pe_name_len)
+ len += pe_name_len + 2;
+
+ /* check if there is a space for this one */
+ pad = 0;
+ if (curr_sb) {
+ pad = (4 - (size_t)curr_sb->head) & 3;
+ if (curr_sb->head + len + pad > curr_sb->end) {
+ sb_queue_tail(curr_sb);
+ curr_sb = NULL;
+ pad = 0;
+ }
+ }
+
if (!curr_sb) {
if (!(curr_sb=ip_vs_sync_buff_create())) {
spin_unlock(&curr_sb_lock);
@@ -393,41 +453,84 @@ void ip_vs_sync_conn(const struct ip_vs_conn *cp)
}
}
- len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
- SIMPLE_CONN_SIZE;
m = curr_sb->mesg;
- s = (struct ip_vs_sync_conn_v0 *)curr_sb->head;
-
- /* copy members */
- s->protocol = cp->protocol;
- s->cport = cp->cport;
- s->vport = cp->vport;
- s->dport = cp->dport;
- s->caddr = cp->caddr.ip;
- s->vaddr = cp->vaddr.ip;
- s->daddr = cp->daddr.ip;
- s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
- s->state = htons(cp->state);
- if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
- struct ip_vs_sync_conn_options *opt =
- (struct ip_vs_sync_conn_options *)&s[1];
- memcpy(opt, &cp->in_seq, sizeof(*opt));
- }
-
+ p = curr_sb->head;
+ curr_sb->head += pad + len;
+ m->size += pad + len;
+ /* Add ev. padding from prev. sync_conn */
+ while (pad--)
+ *(p++) = 0;
+
+ s = (union ip_vs_sync_conn *)p;
+
+ /* Set message type & copy members */
+ s->v4.type = (cp->af == AF_INET6 ? STYPE_F_INET6 : 0);
+ s->v4.ver_size = htons(len & SVER_MASK); /* Version 0 */
+ s->v4.flags = htonl(cp->flags & ~IP_VS_CONN_F_HASHED);
+ s->v4.state = htons(cp->state);
+ s->v4.protocol = cp->protocol;
+ s->v4.cport = cp->cport;
+ s->v4.vport = cp->vport;
+ s->v4.dport = cp->dport;
+ s->v4.fwmark = htonl(cp->fwmark);
+ s->v4.timeout = htonl(cp->timeout / HZ);
m->nr_conns++;
- m->size += len;
- curr_sb->head += len;
- /* check if there is a space for next one */
- if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) {
- sb_queue_tail(curr_sb);
- curr_sb = NULL;
+#ifdef CONFIG_IP_VS_IPV6
+ if (cp->af == AF_INET6) {
+ p += sizeof(struct ip_vs_sync_v6);
+ ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6);
+ ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6);
+ ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6);
+ } else
+#endif
+ {
+ p += sizeof(struct ip_vs_sync_v4); /* options ptr */
+ s->v4.caddr = cp->caddr.ip;
+ s->v4.vaddr = cp->vaddr.ip;
+ s->v4.daddr = cp->daddr.ip;
+ }
+ if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
+ *(p++) = IPVS_OPT_SEQ_DATA;
+ *(p++) = sizeof(struct ip_vs_sync_conn_options);
+ hton_seq((struct ip_vs_seq *)p, &cp->in_seq);
+ p += sizeof(struct ip_vs_seq);
+ hton_seq((struct ip_vs_seq *)p, &cp->out_seq);
+ p += sizeof(struct ip_vs_seq);
}
+ /* Handle pe data */
+ if (cp->pe_data_len && cp->pe_data) {
+ *(p++) = IPVS_OPT_PE_DATA;
+ *(p++) = cp->pe_data_len;
+ memcpy(p, cp->pe_data, cp->pe_data_len);
+ p += cp->pe_data_len;
+ if (pe_name_len) {
+ /* Add PE_NAME */
+ *(p++) = IPVS_OPT_PE_NAME;
+ *(p++) = pe_name_len;
+ memcpy(p, cp->pe->name, pe_name_len);
+ p += pe_name_len;
+ }
+ }
+
spin_unlock(&curr_sb_lock);
+control:
/* synchronize its controller if it has */
- if (cp->control)
- ip_vs_sync_conn(cp->control);
+ cp = cp->control;
+ if (!cp)
+ return;
+ /*
+ * Reduce sync rate for templates
+ * i.e only increment in_pkts for Templates.
+ */
+ if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
+ int pkts = atomic_add_return(1, &cp->in_pkts);
+
+ if (pkts % sysctl_ip_vs_sync_threshold[1] != 1)
+ return;
+ }
+ goto sloop;
}
/*
@@ -596,7 +699,7 @@ static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags,
*/
static void ip_vs_process_message_v0(const char *buffer, const size_t buflen)
{
- struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
+ struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
struct ip_vs_sync_conn_v0 *s;
struct ip_vs_sync_conn_options *opt;
struct ip_vs_protocol *pp;
@@ -604,7 +707,7 @@ static void ip_vs_process_message_v0(const char *buffer, const size_t buflen)
char *p;
int i;
- p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
+ p = (char *)buffer + sizeof(struct ip_vs_sync_mesg_v0);
for (i=0; i<m->nr_conns; i++) {
unsigned flags, state;
@@ -848,11 +951,11 @@ out:
*/
static void ip_vs_process_message(__u8 *buffer, const size_t buflen)
{
- struct ip_vs_sync_mesg_v2 *m2 = (struct ip_vs_sync_mesg_v2 *)buffer;
+ struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
__u8 *p, *msg_end;
- unsigned int i, nr_conns;
+ int i, nr_conns;
- if (buflen < sizeof(struct ip_vs_sync_mesg)) {
+ if (buflen < sizeof(struct ip_vs_sync_mesg_v0)) {
IP_VS_DBG(2, "BACKUP, message header too short\n");
return;
}
@@ -872,7 +975,7 @@ static void ip_vs_process_message(__u8 *buffer, const size_t buflen)
if ((m2->version == SYNC_PROTO_VER) && (m2->reserved == 0)
&& (m2->spare == 0)) {
- msg_end = buffer + sizeof(struct ip_vs_sync_mesg_v2);
+ msg_end = buffer + sizeof(struct ip_vs_sync_mesg);
nr_conns = m2->nr_conns;
for (i=0; i<nr_conns; i++) {
--
1.7.2.3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 8/8] IPVS: Backup, adding version 0 sending capabilities
2010-11-25 1:46 [PATCH 0/8] ipvs: ipvs update for nf-next-2.6 Simon Horman
` (6 preceding siblings ...)
2010-11-25 1:46 ` [PATCH 7/8] IPVS: Backup, Change sending to Version 1 format Simon Horman
@ 2010-11-25 1:46 ` Simon Horman
2010-11-25 13:03 ` [PATCH 0/8] ipvs: ipvs update for nf-next-2.6 Patrick McHardy
8 siblings, 0 replies; 10+ messages in thread
From: Simon Horman @ 2010-11-25 1:46 UTC (permalink / raw)
To: lvs-devel, netdev, netfilter, netfilter-devel
Cc: Hans Schillstrom, Julian Anastasov, Patrick McHardy, Simon Horman
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
This patch adds a sysclt net.ipv4.vs.sync_version
that can be used to send sync msg in version 0 or 1 format.
sync_version value is logical,
Value 1 (default) New version
0 Plain old version
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 2 +
net/netfilter/ipvs/ip_vs_ctl.c | 28 ++++++++-
net/netfilter/ipvs/ip_vs_sync.c | 134 +++++++++++++++++++++++++++++++++++++++
3 files changed, 163 insertions(+), 1 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index a715f3d..d858264 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -883,7 +883,9 @@ extern int sysctl_ip_vs_conntrack;
extern int sysctl_ip_vs_snat_reroute;
extern struct ip_vs_stats ip_vs_stats;
extern const struct ctl_path net_vs_ctl_path[];
+extern int sysctl_ip_vs_sync_ver;
+extern void ip_vs_sync_switch_mode(int mode);
extern struct ip_vs_service *
ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index a5bd002..d12a13c 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -92,7 +92,7 @@ int sysctl_ip_vs_nat_icmp_send = 0;
int sysctl_ip_vs_conntrack;
#endif
int sysctl_ip_vs_snat_reroute = 1;
-
+int sysctl_ip_vs_sync_ver = 1; /* Default version of sync proto */
#ifdef CONFIG_IP_VS_DEBUG
static int sysctl_ip_vs_debug_level = 0;
@@ -1536,6 +1536,25 @@ proc_do_sync_threshold(ctl_table *table, int write,
return rc;
}
+static int
+proc_do_sync_mode(ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ int *valp = table->data;
+ int val = *valp;
+ int rc;
+
+ rc = proc_dointvec(table, write, buffer, lenp, ppos);
+ if (write && (*valp != val)) {
+ if ((*valp < 0) || (*valp > 1)) {
+ /* Restore the correct value */
+ *valp = val;
+ } else {
+ ip_vs_sync_switch_mode(val);
+ }
+ }
+ return rc;
+}
/*
* IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
@@ -1602,6 +1621,13 @@ static struct ctl_table vs_vars[] = {
.mode = 0644,
.proc_handler = &proc_dointvec,
},
+ {
+ .procname = "sync_version",
+ .data = &sysctl_ip_vs_sync_ver,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_do_sync_mode,
+ },
#if 0
{
.procname = "timeout_established",
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index df5abf0..c1c167a 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -5,6 +5,18 @@
* high-performance and highly available server based on a
* cluster of servers.
*
+ * Version 1, is capable of handling both version 0 and 1 messages.
+ * Version 0 is the plain old format.
+ * Note Version 0 receivers will just drop Ver 1 messages.
+ * Version 1 is capable of handle IPv6, Persistence data,
+ * time-outs, and firewall marks.
+ * In ver.1 "ip_vs_sync_conn_options" will be sent in netw. order.
+ * Ver. 0 can be turned on by sysctl -w net.ipv4.vs.sync_version=0
+ *
+ * Definitions Message: is a complete datagram
+ * Sync_conn: is a part of a Message
+ * Param Data is an option to a Sync_conn.
+ *
* Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
*
* ip_vs_sync: sync connection info from master load balancer to backups
@@ -15,6 +27,8 @@
* Alexandre Cassen : Added SyncID support for incoming sync
* messages filtering.
* Justin Ossevoort : Fix endian problem on sync message size.
+ * Hans Schillstrom : Added Version 1: i.e. IPv6,
+ * Persistence support, fwmark and time-out.
*/
#define KMSG_COMPONENT "IPVS"
@@ -392,6 +406,121 @@ get_curr_sync_buff(unsigned long time)
}
/*
+ * Switch mode from sending version 0 or 1
+ * - must handle sync_buf
+ */
+void ip_vs_sync_switch_mode(int mode) {
+
+ if (!ip_vs_sync_state & IP_VS_STATE_MASTER)
+ return;
+ if (mode == sysctl_ip_vs_sync_ver || !curr_sb)
+ return;
+
+ spin_lock_bh(&curr_sb_lock);
+ /* Buffer empty ? then let buf_create do the job */
+ if ( curr_sb->mesg->size <= sizeof(struct ip_vs_sync_mesg)) {
+ kfree(curr_sb);
+ curr_sb = NULL;
+ } else {
+ spin_lock_bh(&ip_vs_sync_lock);
+ if (ip_vs_sync_state & IP_VS_STATE_MASTER)
+ list_add_tail(&curr_sb->list, &ip_vs_sync_queue);
+ else
+ ip_vs_sync_buff_release(curr_sb);
+ spin_unlock_bh(&ip_vs_sync_lock);
+ }
+ spin_unlock_bh(&curr_sb_lock);
+}
+
+/*
+ * Create a new sync buffer for Version 0 proto.
+ */
+static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create_v0(void)
+{
+ struct ip_vs_sync_buff *sb;
+ struct ip_vs_sync_mesg_v0 *mesg;
+
+ if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
+ return NULL;
+
+ if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) {
+ kfree(sb);
+ return NULL;
+ }
+ mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
+ mesg->nr_conns = 0;
+ mesg->syncid = ip_vs_master_syncid;
+ mesg->size = 4;
+ sb->head = (unsigned char *)mesg + 4;
+ sb->end = (unsigned char *)mesg + sync_send_mesg_maxlen;
+ sb->firstuse = jiffies;
+ return sb;
+}
+
+/*
+ * Version 0 , could be switched in by sys_ctl.
+ * Add an ip_vs_conn information into the current sync_buff.
+ */
+void ip_vs_sync_conn_v0(struct ip_vs_conn *cp)
+{
+ struct ip_vs_sync_mesg_v0 *m;
+ struct ip_vs_sync_conn_v0 *s;
+ int len;
+
+ if (unlikely(cp->af != AF_INET))
+ return;
+ /* Do not sync ONE PACKET */
+ if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
+ return;
+
+ spin_lock(&curr_sb_lock);
+ if (!curr_sb) {
+ if (!(curr_sb=ip_vs_sync_buff_create_v0())) {
+ spin_unlock(&curr_sb_lock);
+ pr_err("ip_vs_sync_buff_create failed.\n");
+ return;
+ }
+ }
+
+ len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
+ SIMPLE_CONN_SIZE;
+ m = (struct ip_vs_sync_mesg_v0 *)curr_sb->mesg;
+ s = (struct ip_vs_sync_conn_v0 *)curr_sb->head;
+
+ /* copy members */
+ s->reserved = 0;
+ s->protocol = cp->protocol;
+ s->cport = cp->cport;
+ s->vport = cp->vport;
+ s->dport = cp->dport;
+ s->caddr = cp->caddr.ip;
+ s->vaddr = cp->vaddr.ip;
+ s->daddr = cp->daddr.ip;
+ s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED);
+ s->state = htons(cp->state);
+ if (cp->flags & IP_VS_CONN_F_SEQ_MASK) {
+ struct ip_vs_sync_conn_options *opt =
+ (struct ip_vs_sync_conn_options *)&s[1];
+ memcpy(opt, &cp->in_seq, sizeof(*opt));
+ }
+
+ m->nr_conns++;
+ m->size += len;
+ curr_sb->head += len;
+
+ /* check if there is a space for next one */
+ if (curr_sb->head + FULL_CONN_SIZE > curr_sb->end) {
+ sb_queue_tail(curr_sb);
+ curr_sb = NULL;
+ }
+ spin_unlock(&curr_sb_lock);
+
+ /* synchronize its controller if it has */
+ if (cp->control)
+ ip_vs_sync_conn(cp->control);
+}
+
+/*
* Add an ip_vs_conn information into the current sync_buff.
* Called by ip_vs_in.
* Sending Version 1 messages
@@ -403,6 +532,11 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
__u8 *p;
unsigned int len, pe_name_len, pad;
+ /* Handle old version of the protocol */
+ if (sysctl_ip_vs_sync_ver == 0) {
+ ip_vs_sync_conn_v0(cp);
+ return;
+ }
/* Do not sync ONE PACKET */
if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
goto control;
--
1.7.2.3
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 0/8] ipvs: ipvs update for nf-next-2.6
2010-11-25 1:46 [PATCH 0/8] ipvs: ipvs update for nf-next-2.6 Simon Horman
` (7 preceding siblings ...)
2010-11-25 1:46 ` [PATCH 8/8] IPVS: Backup, adding version 0 sending capabilities Simon Horman
@ 2010-11-25 13:03 ` Patrick McHardy
8 siblings, 0 replies; 10+ messages in thread
From: Patrick McHardy @ 2010-11-25 13:03 UTC (permalink / raw)
To: Simon Horman
Cc: lvs-devel, netdev, netfilter, netfilter-devel, Hans Schillstrom,
Julian Anastasov
Am 25.11.2010 02:46, schrieb Simon Horman:
> git://git.kernel.org/pub/scm/linux/kernel/git/horms/lvs-test-2.6.git master
Pulled, thanks Simon.
^ permalink raw reply [flat|nested] 10+ messages in thread