* [PATCH 74/79] netfilter: reduce NF_VERDICT_MASK to 0xff
From: kaber @ 2011-01-19 19:15 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Florian Westphal <fw@strlen.de>
NF_VERDICT_MASK is currently 0xffff. This is because the upper
16 bits are used to store errno (for NF_DROP) or the queue number
(NF_QUEUE verdict).
As there are up to 0xffff different queues available, there is no more
room to store additional flags.
At the moment there are only 6 different verdicts, i.e. we can reduce
NF_VERDICT_MASK to 0xff to allow storing additional flags in the 0xff00 space.
NF_VERDICT_BITS would then be reduced to 8, but because the value is
exported to userspace, this might cause breakage; e.g.:
e.g. 'queuenr = (1 << NF_VERDICT_BITS) | NF_QUEUE' would now break.
Thus, remove NF_VERDICT_BITS usage in the kernel and move the old value
to the 'userspace compat' section.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
include/linux/netfilter.h | 20 +++++++++++++++-----
net/netfilter/core.c | 4 ++--
net/netfilter/nf_queue.c | 2 +-
3 files changed, 18 insertions(+), 8 deletions(-)
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 0ab7ca7..78b73cc 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -24,16 +24,19 @@
#define NF_MAX_VERDICT NF_STOP
/* we overload the higher bits for encoding auxiliary data such as the queue
- * number. Not nice, but better than additional function arguments. */
-#define NF_VERDICT_MASK 0x0000ffff
-#define NF_VERDICT_BITS 16
+ * number or errno values. Not nice, but better than additional function
+ * arguments. */
+#define NF_VERDICT_MASK 0x000000ff
+
+/* extra verdict flags have mask 0x0000ff00 */
+/* queue number (NF_QUEUE) or errno (NF_DROP) */
#define NF_VERDICT_QMASK 0xffff0000
#define NF_VERDICT_QBITS 16
-#define NF_QUEUE_NR(x) ((((x) << NF_VERDICT_BITS) & NF_VERDICT_QMASK) | NF_QUEUE)
+#define NF_QUEUE_NR(x) ((((x) << 16) & NF_VERDICT_QMASK) | NF_QUEUE)
-#define NF_DROP_ERR(x) (((-x) << NF_VERDICT_BITS) | NF_DROP)
+#define NF_DROP_ERR(x) (((-x) << 16) | NF_DROP)
/* only for userspace compatibility */
#ifndef __KERNEL__
@@ -41,6 +44,9 @@
<= 0x2000 is used for protocol-flags. */
#define NFC_UNKNOWN 0x4000
#define NFC_ALTERED 0x8000
+
+/* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */
+#define NF_VERDICT_BITS 16
#endif
enum nf_inet_hooks {
@@ -72,6 +78,10 @@ union nf_inet_addr {
#ifdef __KERNEL__
#ifdef CONFIG_NETFILTER
+static inline int NF_DROP_GETERR(int verdict)
+{
+ return -(verdict >> NF_VERDICT_QBITS);
+}
static inline int nf_inet_addr_cmp(const union nf_inet_addr *a1,
const union nf_inet_addr *a2)
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 0c5b796..4d88e45 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -175,12 +175,12 @@ next_hook:
ret = 1;
} else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
kfree_skb(skb);
- ret = -(verdict >> NF_VERDICT_BITS);
+ ret = NF_DROP_GETERR(verdict);
if (ret == 0)
ret = -EPERM;
} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
- verdict >> NF_VERDICT_BITS);
+ verdict >> NF_VERDICT_QBITS);
if (ret < 0) {
if (ret == -ECANCELED)
goto next_hook;
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 5c4b730..ce1150d4a 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -299,7 +299,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
case NF_QUEUE:
err = __nf_queue(skb, elem, entry->pf, entry->hook,
entry->indev, entry->outdev, entry->okfn,
- verdict >> NF_VERDICT_BITS);
+ verdict >> NF_VERDICT_QBITS);
if (err < 0) {
if (err == -ECANCELED)
goto next_hook;
--
1.7.2.3
^ permalink raw reply related
* [PATCH 73/79] netfilter: nfnetlink_queue: do not free skb on error
From: kaber @ 2011-01-19 19:15 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Florian Westphal <fw@strlen.de>
Move free responsibility from nf_queue to caller.
This enables more flexible error handling; we can now accept the skb
instead of freeing it.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
net/netfilter/core.c | 7 +++++--
net/netfilter/nf_queue.c | 17 ++++++++++-------
2 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 91d66d2f..0c5b796 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -181,8 +181,11 @@ next_hook:
} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
verdict >> NF_VERDICT_BITS);
- if (ret == -ECANCELED)
- goto next_hook;
+ if (ret < 0) {
+ if (ret == -ECANCELED)
+ goto next_hook;
+ kfree_skb(skb);
+ }
ret = 0;
}
rcu_read_unlock();
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index ad25c7e..5c4b730 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -163,9 +163,8 @@ static int __nf_queue(struct sk_buff *skb,
/* If it's going away, ignore hook. */
if (!try_module_get(entry->elem->owner)) {
- rcu_read_unlock();
- kfree(entry);
- return -ECANCELED;
+ status = -ECANCELED;
+ goto err_unlock;
}
/* Bump dev refs so they don't vanish while packet is out */
if (indev)
@@ -198,7 +197,6 @@ static int __nf_queue(struct sk_buff *skb,
err_unlock:
rcu_read_unlock();
err:
- kfree_skb(skb);
kfree(entry);
return status;
}
@@ -229,7 +227,6 @@ int nf_queue(struct sk_buff *skb,
}
segs = skb_gso_segment(skb, 0);
- kfree_skb(skb);
/* Does not use PTR_ERR to limit the number of error codes that can be
* returned by nf_queue. For instance, callers rely on -ECANCELED to mean
* 'ignore this hook'.
@@ -253,8 +250,11 @@ int nf_queue(struct sk_buff *skb,
segs = nskb;
} while (segs);
+ /* also free orig skb if only some segments were queued */
if (unlikely(err && queued))
err = 0;
+ if (err == 0)
+ kfree_skb(skb);
return err;
}
@@ -300,8 +300,11 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
err = __nf_queue(skb, elem, entry->pf, entry->hook,
entry->indev, entry->outdev, entry->okfn,
verdict >> NF_VERDICT_BITS);
- if (err == -ECANCELED)
- goto next_hook;
+ if (err < 0) {
+ if (err == -ECANCELED)
+ goto next_hook;
+ kfree_skb(skb);
+ }
break;
case NF_STOLEN:
default:
--
1.7.2.3
^ permalink raw reply related
* [PATCH 71/79] netfilter: Kconfig: NFQUEUE is useless without NETFILTER_NETLINK_QUEUE
From: kaber @ 2011-01-19 19:15 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Florian Westphal <fw@strlen.de>
NFLOG already does the same thing for NETFILTER_NETLINK_LOG.
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
net/netfilter/Kconfig | 1 +
1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 93918f0..e2480bd 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -487,6 +487,7 @@ config NETFILTER_XT_TARGET_NFLOG
config NETFILTER_XT_TARGET_NFQUEUE
tristate '"NFQUEUE" target Support'
depends on NETFILTER_ADVANCED
+ select NETFILTER_NETLINK_QUEUE
help
This target replaced the old obsolete QUEUE target.
--
1.7.2.3
^ permalink raw reply related
* [PATCH 68/79] netfilter: xtables: add missing header files to export list
From: kaber @ 2011-01-19 19:15 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
include/linux/netfilter/Kbuild | 2 ++
1 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 9f11fbc..fc4e0aa 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -56,6 +56,8 @@ header-y += xt_rateest.h
header-y += xt_realm.h
header-y += xt_recent.h
header-y += xt_sctp.h
+header-y += xt_secmark.h
+header-y += xt_socket.h
header-y += xt_state.h
header-y += xt_statistic.h
header-y += xt_string.h
--
1.7.2.3
^ permalink raw reply related
* [PATCH 67/79] netfilter: xtables: use __uXX guarded types for userspace exports
From: kaber @ 2011-01-19 19:15 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
include/linux/netfilter_bridge/ebt_802_3.h | 24 ++++++++++++------------
include/linux/netfilter_bridge/ebt_among.h | 2 +-
include/linux/netfilter_bridge/ebt_arp.h | 4 ++--
include/linux/netfilter_bridge/ebt_ip.h | 12 ++++++------
include/linux/netfilter_bridge/ebt_ip6.h | 16 ++++++++--------
include/linux/netfilter_bridge/ebt_limit.h | 8 ++++----
include/linux/netfilter_bridge/ebt_log.h | 6 +++---
include/linux/netfilter_bridge/ebt_mark_m.h | 4 ++--
include/linux/netfilter_bridge/ebt_nflog.h | 10 +++++-----
include/linux/netfilter_bridge/ebt_pkttype.h | 4 ++--
include/linux/netfilter_bridge/ebt_stp.h | 24 ++++++++++++------------
include/linux/netfilter_bridge/ebt_ulog.h | 2 +-
include/linux/netfilter_bridge/ebt_vlan.h | 8 ++++----
include/linux/netfilter_ipv4/ipt_CLUSTERIP.h | 14 +++++++-------
include/linux/netfilter_ipv4/ipt_ECN.h | 6 +++---
include/linux/netfilter_ipv4/ipt_SAME.h | 6 +++---
include/linux/netfilter_ipv4/ipt_TTL.h | 4 ++--
include/linux/netfilter_ipv4/ipt_addrtype.h | 14 +++++++-------
include/linux/netfilter_ipv4/ipt_ah.h | 4 ++--
include/linux/netfilter_ipv4/ipt_ecn.h | 8 ++++----
include/linux/netfilter_ipv4/ipt_ttl.h | 4 ++--
include/linux/netfilter_ipv6/ip6t_HL.h | 4 ++--
include/linux/netfilter_ipv6/ip6t_REJECT.h | 2 +-
include/linux/netfilter_ipv6/ip6t_ah.h | 8 ++++----
include/linux/netfilter_ipv6/ip6t_frag.h | 8 ++++----
include/linux/netfilter_ipv6/ip6t_hl.h | 4 ++--
| 6 +++---
include/linux/netfilter_ipv6/ip6t_mh.h | 4 ++--
include/linux/netfilter_ipv6/ip6t_opts.h | 10 +++++-----
include/linux/netfilter_ipv6/ip6t_rt.h | 12 ++++++------
30 files changed, 121 insertions(+), 121 deletions(-)
diff --git a/include/linux/netfilter_bridge/ebt_802_3.h b/include/linux/netfilter_bridge/ebt_802_3.h
index c73ef0b..c427764 100644
--- a/include/linux/netfilter_bridge/ebt_802_3.h
+++ b/include/linux/netfilter_bridge/ebt_802_3.h
@@ -24,24 +24,24 @@
/* ui has one byte ctrl, ni has two */
struct hdr_ui {
- uint8_t dsap;
- uint8_t ssap;
- uint8_t ctrl;
- uint8_t orig[3];
+ __u8 dsap;
+ __u8 ssap;
+ __u8 ctrl;
+ __u8 orig[3];
__be16 type;
};
struct hdr_ni {
- uint8_t dsap;
- uint8_t ssap;
+ __u8 dsap;
+ __u8 ssap;
__be16 ctrl;
- uint8_t orig[3];
+ __u8 orig[3];
__be16 type;
};
struct ebt_802_3_hdr {
- uint8_t daddr[6];
- uint8_t saddr[6];
+ __u8 daddr[6];
+ __u8 saddr[6];
__be16 len;
union {
struct hdr_ui ui;
@@ -59,10 +59,10 @@ static inline struct ebt_802_3_hdr *ebt_802_3_hdr(const struct sk_buff *skb)
#endif
struct ebt_802_3_info {
- uint8_t sap;
+ __u8 sap;
__be16 type;
- uint8_t bitmask;
- uint8_t invflags;
+ __u8 bitmask;
+ __u8 invflags;
};
#endif
diff --git a/include/linux/netfilter_bridge/ebt_among.h b/include/linux/netfilter_bridge/ebt_among.h
index 0009558..686c961 100644
--- a/include/linux/netfilter_bridge/ebt_among.h
+++ b/include/linux/netfilter_bridge/ebt_among.h
@@ -30,7 +30,7 @@
*/
struct ebt_mac_wormhash_tuple {
- uint32_t cmp[2];
+ __u32 cmp[2];
__be32 ip;
};
diff --git a/include/linux/netfilter_bridge/ebt_arp.h b/include/linux/netfilter_bridge/ebt_arp.h
index cbf4843..e62b5af 100644
--- a/include/linux/netfilter_bridge/ebt_arp.h
+++ b/include/linux/netfilter_bridge/ebt_arp.h
@@ -27,8 +27,8 @@ struct ebt_arp_info
unsigned char smmsk[ETH_ALEN];
unsigned char dmaddr[ETH_ALEN];
unsigned char dmmsk[ETH_ALEN];
- uint8_t bitmask;
- uint8_t invflags;
+ __u8 bitmask;
+ __u8 invflags;
};
#endif
diff --git a/include/linux/netfilter_bridge/ebt_ip.h b/include/linux/netfilter_bridge/ebt_ip.h
index 6a708fb..d99de58 100644
--- a/include/linux/netfilter_bridge/ebt_ip.h
+++ b/include/linux/netfilter_bridge/ebt_ip.h
@@ -31,12 +31,12 @@ struct ebt_ip_info {
__be32 daddr;
__be32 smsk;
__be32 dmsk;
- uint8_t tos;
- uint8_t protocol;
- uint8_t bitmask;
- uint8_t invflags;
- uint16_t sport[2];
- uint16_t dport[2];
+ __u8 tos;
+ __u8 protocol;
+ __u8 bitmask;
+ __u8 invflags;
+ __u16 sport[2];
+ __u16 dport[2];
};
#endif
diff --git a/include/linux/netfilter_bridge/ebt_ip6.h b/include/linux/netfilter_bridge/ebt_ip6.h
index 22af18a..998e9d5 100644
--- a/include/linux/netfilter_bridge/ebt_ip6.h
+++ b/include/linux/netfilter_bridge/ebt_ip6.h
@@ -31,17 +31,17 @@ struct ebt_ip6_info {
struct in6_addr daddr;
struct in6_addr smsk;
struct in6_addr dmsk;
- uint8_t tclass;
- uint8_t protocol;
- uint8_t bitmask;
- uint8_t invflags;
+ __u8 tclass;
+ __u8 protocol;
+ __u8 bitmask;
+ __u8 invflags;
union {
- uint16_t sport[2];
- uint8_t icmpv6_type[2];
+ __u16 sport[2];
+ __u8 icmpv6_type[2];
};
union {
- uint16_t dport[2];
- uint8_t icmpv6_code[2];
+ __u16 dport[2];
+ __u8 icmpv6_code[2];
};
};
diff --git a/include/linux/netfilter_bridge/ebt_limit.h b/include/linux/netfilter_bridge/ebt_limit.h
index 4bf76b7..721d51f 100644
--- a/include/linux/netfilter_bridge/ebt_limit.h
+++ b/include/linux/netfilter_bridge/ebt_limit.h
@@ -10,13 +10,13 @@
seconds, or one every 59 hours. */
struct ebt_limit_info {
- u_int32_t avg; /* Average secs between packets * scale */
- u_int32_t burst; /* Period multiplier for upper limit. */
+ __u32 avg; /* Average secs between packets * scale */
+ __u32 burst; /* Period multiplier for upper limit. */
/* Used internally by the kernel */
unsigned long prev;
- u_int32_t credit;
- u_int32_t credit_cap, cost;
+ __u32 credit;
+ __u32 credit_cap, cost;
};
#endif
diff --git a/include/linux/netfilter_bridge/ebt_log.h b/include/linux/netfilter_bridge/ebt_log.h
index cc2cdfb..564beb4 100644
--- a/include/linux/netfilter_bridge/ebt_log.h
+++ b/include/linux/netfilter_bridge/ebt_log.h
@@ -10,9 +10,9 @@
#define EBT_LOG_WATCHER "log"
struct ebt_log_info {
- uint8_t loglevel;
- uint8_t prefix[EBT_LOG_PREFIX_SIZE];
- uint32_t bitmask;
+ __u8 loglevel;
+ __u8 prefix[EBT_LOG_PREFIX_SIZE];
+ __u32 bitmask;
};
#endif
diff --git a/include/linux/netfilter_bridge/ebt_mark_m.h b/include/linux/netfilter_bridge/ebt_mark_m.h
index 9ceb10e..97b96c4 100644
--- a/include/linux/netfilter_bridge/ebt_mark_m.h
+++ b/include/linux/netfilter_bridge/ebt_mark_m.h
@@ -6,8 +6,8 @@
#define EBT_MARK_MASK (EBT_MARK_AND | EBT_MARK_OR)
struct ebt_mark_m_info {
unsigned long mark, mask;
- uint8_t invert;
- uint8_t bitmask;
+ __u8 invert;
+ __u8 bitmask;
};
#define EBT_MARK_MATCH "mark_m"
diff --git a/include/linux/netfilter_bridge/ebt_nflog.h b/include/linux/netfilter_bridge/ebt_nflog.h
index 0528178..477315b 100644
--- a/include/linux/netfilter_bridge/ebt_nflog.h
+++ b/include/linux/netfilter_bridge/ebt_nflog.h
@@ -10,11 +10,11 @@
#define EBT_NFLOG_DEFAULT_THRESHOLD 1
struct ebt_nflog_info {
- u_int32_t len;
- u_int16_t group;
- u_int16_t threshold;
- u_int16_t flags;
- u_int16_t pad;
+ __u32 len;
+ __u16 group;
+ __u16 threshold;
+ __u16 flags;
+ __u16 pad;
char prefix[EBT_NFLOG_PREFIX_SIZE];
};
diff --git a/include/linux/netfilter_bridge/ebt_pkttype.h b/include/linux/netfilter_bridge/ebt_pkttype.h
index 51a7998..7c0fb0f 100644
--- a/include/linux/netfilter_bridge/ebt_pkttype.h
+++ b/include/linux/netfilter_bridge/ebt_pkttype.h
@@ -2,8 +2,8 @@
#define __LINUX_BRIDGE_EBT_PKTTYPE_H
struct ebt_pkttype_info {
- uint8_t pkt_type;
- uint8_t invert;
+ __u8 pkt_type;
+ __u8 invert;
};
#define EBT_PKTTYPE_MATCH "pkttype"
diff --git a/include/linux/netfilter_bridge/ebt_stp.h b/include/linux/netfilter_bridge/ebt_stp.h
index e503a0a..13a0bd4 100644
--- a/include/linux/netfilter_bridge/ebt_stp.h
+++ b/include/linux/netfilter_bridge/ebt_stp.h
@@ -21,24 +21,24 @@
#define EBT_STP_MATCH "stp"
struct ebt_stp_config_info {
- uint8_t flags;
- uint16_t root_priol, root_priou;
+ __u8 flags;
+ __u16 root_priol, root_priou;
char root_addr[6], root_addrmsk[6];
- uint32_t root_costl, root_costu;
- uint16_t sender_priol, sender_priou;
+ __u32 root_costl, root_costu;
+ __u16 sender_priol, sender_priou;
char sender_addr[6], sender_addrmsk[6];
- uint16_t portl, portu;
- uint16_t msg_agel, msg_ageu;
- uint16_t max_agel, max_ageu;
- uint16_t hello_timel, hello_timeu;
- uint16_t forward_delayl, forward_delayu;
+ __u16 portl, portu;
+ __u16 msg_agel, msg_ageu;
+ __u16 max_agel, max_ageu;
+ __u16 hello_timel, hello_timeu;
+ __u16 forward_delayl, forward_delayu;
};
struct ebt_stp_info {
- uint8_t type;
+ __u8 type;
struct ebt_stp_config_info config;
- uint16_t bitmask;
- uint16_t invflags;
+ __u16 bitmask;
+ __u16 invflags;
};
#endif
diff --git a/include/linux/netfilter_bridge/ebt_ulog.h b/include/linux/netfilter_bridge/ebt_ulog.h
index b677e26..de35a51 100644
--- a/include/linux/netfilter_bridge/ebt_ulog.h
+++ b/include/linux/netfilter_bridge/ebt_ulog.h
@@ -10,7 +10,7 @@
#define EBT_ULOG_VERSION 1
struct ebt_ulog_info {
- uint32_t nlgroup;
+ __u32 nlgroup;
unsigned int cprange;
unsigned int qthreshold;
char prefix[EBT_ULOG_PREFIX_LEN];
diff --git a/include/linux/netfilter_bridge/ebt_vlan.h b/include/linux/netfilter_bridge/ebt_vlan.h
index 1d98be4..48dffc1 100644
--- a/include/linux/netfilter_bridge/ebt_vlan.h
+++ b/include/linux/netfilter_bridge/ebt_vlan.h
@@ -8,12 +8,12 @@
#define EBT_VLAN_MATCH "vlan"
struct ebt_vlan_info {
- uint16_t id; /* VLAN ID {1-4095} */
- uint8_t prio; /* VLAN User Priority {0-7} */
+ __u16 id; /* VLAN ID {1-4095} */
+ __u8 prio; /* VLAN User Priority {0-7} */
__be16 encap; /* VLAN Encapsulated frame code {0-65535} */
- uint8_t bitmask; /* Args bitmask bit 1=1 - ID arg,
+ __u8 bitmask; /* Args bitmask bit 1=1 - ID arg,
bit 2=1 User-Priority arg, bit 3=1 encap*/
- uint8_t invflags; /* Inverse bitmask bit 1=1 - inversed ID arg,
+ __u8 invflags; /* Inverse bitmask bit 1=1 - inversed ID arg,
bit 2=1 - inversed Pirority arg */
};
diff --git a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
index e5a3687..3114f06 100644
--- a/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
+++ b/include/linux/netfilter_ipv4/ipt_CLUSTERIP.h
@@ -17,15 +17,15 @@ struct clusterip_config;
struct ipt_clusterip_tgt_info {
- u_int32_t flags;
+ __u32 flags;
/* only relevant for new ones */
- u_int8_t clustermac[6];
- u_int16_t num_total_nodes;
- u_int16_t num_local_nodes;
- u_int16_t local_nodes[CLUSTERIP_MAX_NODES];
- u_int32_t hash_mode;
- u_int32_t hash_initval;
+ __u8 clustermac[6];
+ __u16 num_total_nodes;
+ __u16 num_local_nodes;
+ __u16 local_nodes[CLUSTERIP_MAX_NODES];
+ __u32 hash_mode;
+ __u32 hash_initval;
/* Used internally by the kernel */
struct clusterip_config *config;
diff --git a/include/linux/netfilter_ipv4/ipt_ECN.h b/include/linux/netfilter_ipv4/ipt_ECN.h
index 7ca4591..c6e3e01 100644
--- a/include/linux/netfilter_ipv4/ipt_ECN.h
+++ b/include/linux/netfilter_ipv4/ipt_ECN.h
@@ -19,11 +19,11 @@
#define IPT_ECN_OP_MASK 0xce
struct ipt_ECN_info {
- u_int8_t operation; /* bitset of operations */
- u_int8_t ip_ect; /* ECT codepoint of IPv4 header, pre-shifted */
+ __u8 operation; /* bitset of operations */
+ __u8 ip_ect; /* ECT codepoint of IPv4 header, pre-shifted */
union {
struct {
- u_int8_t ece:1, cwr:1; /* TCP ECT bits */
+ __u8 ece:1, cwr:1; /* TCP ECT bits */
} tcp;
} proto;
};
diff --git a/include/linux/netfilter_ipv4/ipt_SAME.h b/include/linux/netfilter_ipv4/ipt_SAME.h
index 2529660..fa0ebec 100644
--- a/include/linux/netfilter_ipv4/ipt_SAME.h
+++ b/include/linux/netfilter_ipv4/ipt_SAME.h
@@ -7,9 +7,9 @@
struct ipt_same_info {
unsigned char info;
- u_int32_t rangesize;
- u_int32_t ipnum;
- u_int32_t *iparray;
+ __u32 rangesize;
+ __u32 ipnum;
+ __u32 *iparray;
/* hangs off end. */
struct nf_nat_range range[IPT_SAME_MAX_RANGE];
diff --git a/include/linux/netfilter_ipv4/ipt_TTL.h b/include/linux/netfilter_ipv4/ipt_TTL.h
index ee6611e..f6250e4 100644
--- a/include/linux/netfilter_ipv4/ipt_TTL.h
+++ b/include/linux/netfilter_ipv4/ipt_TTL.h
@@ -13,8 +13,8 @@ enum {
#define IPT_TTL_MAXMODE IPT_TTL_DEC
struct ipt_TTL_info {
- u_int8_t mode;
- u_int8_t ttl;
+ __u8 mode;
+ __u8 ttl;
};
diff --git a/include/linux/netfilter_ipv4/ipt_addrtype.h b/include/linux/netfilter_ipv4/ipt_addrtype.h
index 446de6a..f29c3cf 100644
--- a/include/linux/netfilter_ipv4/ipt_addrtype.h
+++ b/include/linux/netfilter_ipv4/ipt_addrtype.h
@@ -9,17 +9,17 @@ enum {
};
struct ipt_addrtype_info_v1 {
- u_int16_t source; /* source-type mask */
- u_int16_t dest; /* dest-type mask */
- u_int32_t flags;
+ __u16 source; /* source-type mask */
+ __u16 dest; /* dest-type mask */
+ __u32 flags;
};
/* revision 0 */
struct ipt_addrtype_info {
- u_int16_t source; /* source-type mask */
- u_int16_t dest; /* dest-type mask */
- u_int32_t invert_source;
- u_int32_t invert_dest;
+ __u16 source; /* source-type mask */
+ __u16 dest; /* dest-type mask */
+ __u32 invert_source;
+ __u32 invert_dest;
};
#endif
diff --git a/include/linux/netfilter_ipv4/ipt_ah.h b/include/linux/netfilter_ipv4/ipt_ah.h
index 2e555b4..8fea283 100644
--- a/include/linux/netfilter_ipv4/ipt_ah.h
+++ b/include/linux/netfilter_ipv4/ipt_ah.h
@@ -2,8 +2,8 @@
#define _IPT_AH_H
struct ipt_ah {
- u_int32_t spis[2]; /* Security Parameter Index */
- u_int8_t invflags; /* Inverse flags */
+ __u32 spis[2]; /* Security Parameter Index */
+ __u8 invflags; /* Inverse flags */
};
diff --git a/include/linux/netfilter_ipv4/ipt_ecn.h b/include/linux/netfilter_ipv4/ipt_ecn.h
index 9945baa..78b98aa 100644
--- a/include/linux/netfilter_ipv4/ipt_ecn.h
+++ b/include/linux/netfilter_ipv4/ipt_ecn.h
@@ -20,12 +20,12 @@
/* match info */
struct ipt_ecn_info {
- u_int8_t operation;
- u_int8_t invert;
- u_int8_t ip_ect;
+ __u8 operation;
+ __u8 invert;
+ __u8 ip_ect;
union {
struct {
- u_int8_t ect;
+ __u8 ect;
} tcp;
} proto;
};
diff --git a/include/linux/netfilter_ipv4/ipt_ttl.h b/include/linux/netfilter_ipv4/ipt_ttl.h
index ee24fd8..93d9a06 100644
--- a/include/linux/netfilter_ipv4/ipt_ttl.h
+++ b/include/linux/netfilter_ipv4/ipt_ttl.h
@@ -13,8 +13,8 @@ enum {
struct ipt_ttl_info {
- u_int8_t mode;
- u_int8_t ttl;
+ __u8 mode;
+ __u8 ttl;
};
diff --git a/include/linux/netfilter_ipv6/ip6t_HL.h b/include/linux/netfilter_ipv6/ip6t_HL.h
index afb7813..81cdaf0 100644
--- a/include/linux/netfilter_ipv6/ip6t_HL.h
+++ b/include/linux/netfilter_ipv6/ip6t_HL.h
@@ -14,8 +14,8 @@ enum {
#define IP6T_HL_MAXMODE IP6T_HL_DEC
struct ip6t_HL_info {
- u_int8_t mode;
- u_int8_t hop_limit;
+ __u8 mode;
+ __u8 hop_limit;
};
diff --git a/include/linux/netfilter_ipv6/ip6t_REJECT.h b/include/linux/netfilter_ipv6/ip6t_REJECT.h
index 6be6504..b999aa4 100644
--- a/include/linux/netfilter_ipv6/ip6t_REJECT.h
+++ b/include/linux/netfilter_ipv6/ip6t_REJECT.h
@@ -12,7 +12,7 @@ enum ip6t_reject_with {
};
struct ip6t_reject_info {
- u_int32_t with; /* reject type */
+ __u32 with; /* reject type */
};
#endif /*_IP6T_REJECT_H*/
diff --git a/include/linux/netfilter_ipv6/ip6t_ah.h b/include/linux/netfilter_ipv6/ip6t_ah.h
index 17a745c..a602c16 100644
--- a/include/linux/netfilter_ipv6/ip6t_ah.h
+++ b/include/linux/netfilter_ipv6/ip6t_ah.h
@@ -2,10 +2,10 @@
#define _IP6T_AH_H
struct ip6t_ah {
- u_int32_t spis[2]; /* Security Parameter Index */
- u_int32_t hdrlen; /* Header Length */
- u_int8_t hdrres; /* Test of the Reserved Filed */
- u_int8_t invflags; /* Inverse flags */
+ __u32 spis[2]; /* Security Parameter Index */
+ __u32 hdrlen; /* Header Length */
+ __u8 hdrres; /* Test of the Reserved Filed */
+ __u8 invflags; /* Inverse flags */
};
#define IP6T_AH_SPI 0x01
diff --git a/include/linux/netfilter_ipv6/ip6t_frag.h b/include/linux/netfilter_ipv6/ip6t_frag.h
index 3724d08..538b31e 100644
--- a/include/linux/netfilter_ipv6/ip6t_frag.h
+++ b/include/linux/netfilter_ipv6/ip6t_frag.h
@@ -2,10 +2,10 @@
#define _IP6T_FRAG_H
struct ip6t_frag {
- u_int32_t ids[2]; /* Security Parameter Index */
- u_int32_t hdrlen; /* Header Length */
- u_int8_t flags; /* */
- u_int8_t invflags; /* Inverse flags */
+ __u32 ids[2]; /* Security Parameter Index */
+ __u32 hdrlen; /* Header Length */
+ __u8 flags; /* */
+ __u8 invflags; /* Inverse flags */
};
#define IP6T_FRAG_IDS 0x01
diff --git a/include/linux/netfilter_ipv6/ip6t_hl.h b/include/linux/netfilter_ipv6/ip6t_hl.h
index 5ef91b8..c6fddcb 100644
--- a/include/linux/netfilter_ipv6/ip6t_hl.h
+++ b/include/linux/netfilter_ipv6/ip6t_hl.h
@@ -14,8 +14,8 @@ enum {
struct ip6t_hl_info {
- u_int8_t mode;
- u_int8_t hop_limit;
+ __u8 mode;
+ __u8 hop_limit;
};
--git a/include/linux/netfilter_ipv6/ip6t_ipv6header.h b/include/linux/netfilter_ipv6/ip6t_ipv6header.h
index 01dfd44..73d53bd 100644
--- a/include/linux/netfilter_ipv6/ip6t_ipv6header.h
+++ b/include/linux/netfilter_ipv6/ip6t_ipv6header.h
@@ -9,9 +9,9 @@ on whether they contain certain headers */
#define __IPV6HEADER_H
struct ip6t_ipv6header_info {
- u_int8_t matchflags;
- u_int8_t invflags;
- u_int8_t modeflag;
+ __u8 matchflags;
+ __u8 invflags;
+ __u8 modeflag;
};
#define MASK_HOPOPTS 128
diff --git a/include/linux/netfilter_ipv6/ip6t_mh.h b/include/linux/netfilter_ipv6/ip6t_mh.h
index 18549bc..98c8cf6 100644
--- a/include/linux/netfilter_ipv6/ip6t_mh.h
+++ b/include/linux/netfilter_ipv6/ip6t_mh.h
@@ -3,8 +3,8 @@
/* MH matching stuff */
struct ip6t_mh {
- u_int8_t types[2]; /* MH type range */
- u_int8_t invflags; /* Inverse flags */
+ __u8 types[2]; /* MH type range */
+ __u8 invflags; /* Inverse flags */
};
/* Values for "invflags" field in struct ip6t_mh. */
diff --git a/include/linux/netfilter_ipv6/ip6t_opts.h b/include/linux/netfilter_ipv6/ip6t_opts.h
index 62d89bc..405d309 100644
--- a/include/linux/netfilter_ipv6/ip6t_opts.h
+++ b/include/linux/netfilter_ipv6/ip6t_opts.h
@@ -4,11 +4,11 @@
#define IP6T_OPTS_OPTSNR 16
struct ip6t_opts {
- u_int32_t hdrlen; /* Header Length */
- u_int8_t flags; /* */
- u_int8_t invflags; /* Inverse flags */
- u_int16_t opts[IP6T_OPTS_OPTSNR]; /* opts */
- u_int8_t optsnr; /* Nr of OPts */
+ __u32 hdrlen; /* Header Length */
+ __u8 flags; /* */
+ __u8 invflags; /* Inverse flags */
+ __u16 opts[IP6T_OPTS_OPTSNR]; /* opts */
+ __u8 optsnr; /* Nr of OPts */
};
#define IP6T_OPTS_LEN 0x01
diff --git a/include/linux/netfilter_ipv6/ip6t_rt.h b/include/linux/netfilter_ipv6/ip6t_rt.h
index ab91bfd..e8dad20 100644
--- a/include/linux/netfilter_ipv6/ip6t_rt.h
+++ b/include/linux/netfilter_ipv6/ip6t_rt.h
@@ -6,13 +6,13 @@
#define IP6T_RT_HOPS 16
struct ip6t_rt {
- u_int32_t rt_type; /* Routing Type */
- u_int32_t segsleft[2]; /* Segments Left */
- u_int32_t hdrlen; /* Header Length */
- u_int8_t flags; /* */
- u_int8_t invflags; /* Inverse flags */
+ __u32 rt_type; /* Routing Type */
+ __u32 segsleft[2]; /* Segments Left */
+ __u32 hdrlen; /* Header Length */
+ __u8 flags; /* */
+ __u8 invflags; /* Inverse flags */
struct in6_addr addrs[IP6T_RT_HOPS]; /* Hops */
- u_int8_t addrnr; /* Nr of Addresses */
+ __u8 addrnr; /* Nr of Addresses */
};
#define IP6T_RT_TYP 0x01
--
1.7.2.3
^ permalink raw reply related
* [PATCH 64/79] netfilter: xtables: add missing aliases for autoloading via iptables
From: kaber @ 2011-01-19 19:15 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
net/netfilter/xt_IDLETIMER.c | 2 ++
net/netfilter/xt_LED.c | 2 ++
net/netfilter/xt_cpu.c | 2 ++
3 files changed, 6 insertions(+), 0 deletions(-)
diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c
index be1f22e..3bdd443 100644
--- a/net/netfilter/xt_IDLETIMER.c
+++ b/net/netfilter/xt_IDLETIMER.c
@@ -313,3 +313,5 @@ MODULE_AUTHOR("Timo Teras <ext-timo.teras@nokia.com>");
MODULE_AUTHOR("Luciano Coelho <luciano.coelho@nokia.com>");
MODULE_DESCRIPTION("Xtables: idle time monitor");
MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("ipt_IDLETIMER");
+MODULE_ALIAS("ip6t_IDLETIMER");
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index a414050..993de2b 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -31,6 +31,8 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>");
MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match");
+MODULE_ALIAS("ipt_LED");
+MODULE_ALIAS("ip6t_LED");
static LIST_HEAD(xt_led_triggers);
static DEFINE_MUTEX(xt_led_mutex);
diff --git a/net/netfilter/xt_cpu.c b/net/netfilter/xt_cpu.c
index b39db8a..c7a2e54 100644
--- a/net/netfilter/xt_cpu.c
+++ b/net/netfilter/xt_cpu.c
@@ -22,6 +22,8 @@
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Eric Dumazet <eric.dumazet@gmail.com>");
MODULE_DESCRIPTION("Xtables: CPU match");
+MODULE_ALIAS("ipt_cpu");
+MODULE_ALIAS("ip6t_cpu");
static int cpu_mt_check(const struct xt_mtchk_param *par)
{
--
1.7.2.3
^ permalink raw reply related
* [PATCH 61/79] netfilter: nf_conntrack: use is_vmalloc_addr()
From: kaber @ 2011-01-19 19:15 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Patrick McHardy <kaber@trash.net>
Use is_vmalloc_addr() in nf_ct_free_hashtable() and get rid of
the vmalloc flags to indicate that a hash table has been allocated
using vmalloc().
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
include/net/netfilter/nf_conntrack.h | 4 ++--
include/net/netns/conntrack.h | 2 --
include/net/netns/ipv4.h | 1 -
net/ipv4/netfilter/nf_nat_core.c | 6 ++----
net/netfilter/nf_conntrack_core.c | 26 +++++++++-----------------
net/netfilter/nf_conntrack_expect.c | 9 +++------
net/netfilter/nf_conntrack_helper.c | 10 +++-------
7 files changed, 19 insertions(+), 39 deletions(-)
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 2bc344c..d0d1337 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -202,9 +202,9 @@ extern void nf_ct_l3proto_module_put(unsigned short l3proto);
* Allocate a hashtable of hlist_head (if nulls == 0),
* or hlist_nulls_head (if nulls == 1)
*/
-extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls);
+extern void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls);
-extern void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size);
+extern void nf_ct_free_hashtable(void *hash, unsigned int size);
extern struct nf_conntrack_tuple_hash *
__nf_conntrack_find(struct net *net, u16 zone,
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index d4958d4..5cf8a8c 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -28,8 +28,6 @@ struct netns_ct {
struct ctl_table_header *acct_sysctl_header;
struct ctl_table_header *event_sysctl_header;
#endif
- int hash_vmalloc;
- int expect_vmalloc;
char *slabname;
};
#endif
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index d68c3f1..e2e2ef5 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -43,7 +43,6 @@ struct netns_ipv4 {
struct xt_table *nat_table;
struct hlist_head *nat_bysource;
unsigned int nat_htable_size;
- int nat_vmalloced;
#endif
int sysctl_icmp_echo_ignore_all;
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index eb55835..6972cee 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -682,8 +682,7 @@ static int __net_init nf_nat_net_init(struct net *net)
{
/* Leave them the same for the moment. */
net->ipv4.nat_htable_size = net->ct.htable_size;
- net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size,
- &net->ipv4.nat_vmalloced, 0);
+ net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size, 0);
if (!net->ipv4.nat_bysource)
return -ENOMEM;
return 0;
@@ -705,8 +704,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
{
nf_ct_iterate_cleanup(net, &clean_nat, NULL);
synchronize_rcu();
- nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced,
- net->ipv4.nat_htable_size);
+ nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_htable_size);
}
static struct pernet_operations nf_nat_net_ops = {
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index e95ac42..dc2ff2c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1202,9 +1202,9 @@ static int kill_all(struct nf_conn *i, void *data)
return 1;
}
-void nf_ct_free_hashtable(void *hash, int vmalloced, unsigned int size)
+void nf_ct_free_hashtable(void *hash, unsigned int size)
{
- if (vmalloced)
+ if (is_vmalloc_addr(hash))
vfree(hash);
else
free_pages((unsigned long)hash,
@@ -1271,8 +1271,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
goto i_see_dead_people;
}
- nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
- net->ct.htable_size);
+ nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
nf_conntrack_ecache_fini(net);
nf_conntrack_acct_fini(net);
nf_conntrack_expect_fini(net);
@@ -1301,21 +1300,18 @@ void nf_conntrack_cleanup(struct net *net)
}
}
-void *nf_ct_alloc_hashtable(unsigned int *sizep, int *vmalloced, int nulls)
+void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)
{
struct hlist_nulls_head *hash;
unsigned int nr_slots, i;
size_t sz;
- *vmalloced = 0;
-
BUILD_BUG_ON(sizeof(struct hlist_nulls_head) != sizeof(struct hlist_head));
nr_slots = *sizep = roundup(*sizep, PAGE_SIZE / sizeof(struct hlist_nulls_head));
sz = nr_slots * sizeof(struct hlist_nulls_head);
hash = (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO,
get_order(sz));
if (!hash) {
- *vmalloced = 1;
printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n");
hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
PAGE_KERNEL);
@@ -1331,7 +1327,7 @@ EXPORT_SYMBOL_GPL(nf_ct_alloc_hashtable);
int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
{
- int i, bucket, vmalloced, old_vmalloced;
+ int i, bucket;
unsigned int hashsize, old_size;
struct hlist_nulls_head *hash, *old_hash;
struct nf_conntrack_tuple_hash *h;
@@ -1348,7 +1344,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
if (!hashsize)
return -EINVAL;
- hash = nf_ct_alloc_hashtable(&hashsize, &vmalloced, 1);
+ hash = nf_ct_alloc_hashtable(&hashsize, 1);
if (!hash)
return -ENOMEM;
@@ -1370,15 +1366,13 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
}
}
old_size = init_net.ct.htable_size;
- old_vmalloced = init_net.ct.hash_vmalloc;
old_hash = init_net.ct.hash;
init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
- init_net.ct.hash_vmalloc = vmalloced;
init_net.ct.hash = hash;
spin_unlock_bh(&nf_conntrack_lock);
- nf_ct_free_hashtable(old_hash, old_vmalloced, old_size);
+ nf_ct_free_hashtable(old_hash, old_size);
return 0;
}
EXPORT_SYMBOL_GPL(nf_conntrack_set_hashsize);
@@ -1491,8 +1485,7 @@ static int nf_conntrack_init_net(struct net *net)
}
net->ct.htable_size = nf_conntrack_htable_size;
- net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size,
- &net->ct.hash_vmalloc, 1);
+ net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size, 1);
if (!net->ct.hash) {
ret = -ENOMEM;
printk(KERN_ERR "Unable to create nf_conntrack_hash\n");
@@ -1515,8 +1508,7 @@ err_ecache:
err_acct:
nf_conntrack_expect_fini(net);
err_expect:
- nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
- net->ct.htable_size);
+ nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
err_hash:
kmem_cache_destroy(net->ct.nf_conntrack_cachep);
err_cache:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 4a9ed23..cd1e8e0 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -639,8 +639,7 @@ int nf_conntrack_expect_init(struct net *net)
}
net->ct.expect_count = 0;
- net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
- &net->ct.expect_vmalloc, 0);
+ net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
if (net->ct.expect_hash == NULL)
goto err1;
@@ -662,8 +661,7 @@ err3:
if (net_eq(net, &init_net))
kmem_cache_destroy(nf_ct_expect_cachep);
err2:
- nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
- nf_ct_expect_hsize);
+ nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
err1:
return err;
}
@@ -675,6 +673,5 @@ void nf_conntrack_expect_fini(struct net *net)
rcu_barrier(); /* Wait for call_rcu() before destroy */
kmem_cache_destroy(nf_ct_expect_cachep);
}
- nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
- nf_ct_expect_hsize);
+ nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 767bbe9..1bdfea3 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -33,7 +33,6 @@ static DEFINE_MUTEX(nf_ct_helper_mutex);
static struct hlist_head *nf_ct_helper_hash __read_mostly;
static unsigned int nf_ct_helper_hsize __read_mostly;
static unsigned int nf_ct_helper_count __read_mostly;
-static int nf_ct_helper_vmalloc;
/* Stupid hash, but collision free for the default registrations of the
@@ -267,8 +266,7 @@ int nf_conntrack_helper_init(void)
int err;
nf_ct_helper_hsize = 1; /* gets rounded up to use one page */
- nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize,
- &nf_ct_helper_vmalloc, 0);
+ nf_ct_helper_hash = nf_ct_alloc_hashtable(&nf_ct_helper_hsize, 0);
if (!nf_ct_helper_hash)
return -ENOMEM;
@@ -279,14 +277,12 @@ int nf_conntrack_helper_init(void)
return 0;
err1:
- nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc,
- nf_ct_helper_hsize);
+ nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
return err;
}
void nf_conntrack_helper_fini(void)
{
nf_ct_extend_unregister(&helper_extend);
- nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_vmalloc,
- nf_ct_helper_hsize);
+ nf_ct_free_hashtable(nf_ct_helper_hash, nf_ct_helper_hsize);
}
--
1.7.2.3
^ permalink raw reply related
* [PATCH 54/79] IPVS: netns, misc init_net removal in core.
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
init_net removed in __ip_vs_addr_is_local_v6, and got net as param.
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_core.c | 6 ++++--
net/netfilter/ipvs/ip_vs_ctl.c | 9 +++++----
2 files changed, 9 insertions(+), 6 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index bdda346..9e10c7a 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -499,6 +499,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_proto_data *pd)
{
+ struct net *net;
struct netns_ipvs *ipvs;
__be16 _ports[2], *pptr;
struct ip_vs_iphdr iph;
@@ -511,18 +512,19 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_service_put(svc);
return NF_DROP;
}
+ net = skb_net(skb);
#ifdef CONFIG_IP_VS_IPV6
if (svc->af == AF_INET6)
unicast = ipv6_addr_type(&iph.daddr.in6) & IPV6_ADDR_UNICAST;
else
#endif
- unicast = (inet_addr_type(&init_net, iph.daddr.ip) == RTN_UNICAST);
+ unicast = (inet_addr_type(net, iph.daddr.ip) == RTN_UNICAST);
/* if it is fwmark-based service, the cache_bypass sysctl is up
and the destination is a non-local unicast, then create
a cache_bypass connection entry */
- ipvs = net_ipvs(skb_net(skb));
+ ipvs = net_ipvs(net);
if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
int ret, cs;
struct ip_vs_conn *cp;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 65f5de4..edf2b6d 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -71,7 +71,8 @@ int ip_vs_get_debug_level(void)
#ifdef CONFIG_IP_VS_IPV6
/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
-static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
+static int __ip_vs_addr_is_local_v6(struct net *net,
+ const struct in6_addr *addr)
{
struct rt6_info *rt;
struct flowi fl = {
@@ -80,7 +81,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
.fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
};
- rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
+ rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl);
if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
return 1;
@@ -810,12 +811,12 @@ ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
atype = ipv6_addr_type(&udest->addr.in6);
if ((!(atype & IPV6_ADDR_UNICAST) ||
atype & IPV6_ADDR_LINKLOCAL) &&
- !__ip_vs_addr_is_local_v6(&udest->addr.in6))
+ !__ip_vs_addr_is_local_v6(svc->net, &udest->addr.in6))
return -EINVAL;
} else
#endif
{
- atype = inet_addr_type(&init_net, udest->addr.ip);
+ atype = inet_addr_type(svc->net, udest->addr.ip);
if (atype != RTN_LOCAL && atype != RTN_UNICAST)
return -EINVAL;
}
--
1.7.2.3
^ permalink raw reply related
* [PATCH 51/79] IPVS: netns, defense work timer.
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
This patch makes defense work timer per name-space,
A net ptr had to be added to the ipvs struct,
since it's needed by defense_work_handler.
[ horms@verge.net.au: Use cancel_delayed_work_sync() instead of
cancel_rearming_delayed_work(). Found during
merge conflict resoliution ]
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 2 +-
include/net/netns/ip_vs.h | 3 +++
net/netfilter/ipvs/ip_vs_conn.c | 5 +++--
net/netfilter/ipvs/ip_vs_core.c | 1 +
net/netfilter/ipvs/ip_vs_ctl.c | 20 +++++++++-----------
5 files changed, 17 insertions(+), 14 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index af9acf4..fbe660f 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -877,7 +877,7 @@ extern const char * ip_vs_state_name(__u16 proto, int state);
extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp);
extern int ip_vs_check_template(struct ip_vs_conn *ct);
-extern void ip_vs_random_dropentry(void);
+extern void ip_vs_random_dropentry(struct net *net);
extern int ip_vs_conn_init(void);
extern void ip_vs_conn_cleanup(void);
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
index c4b1abf..4133261 100644
--- a/include/net/netns/ip_vs.h
+++ b/include/net/netns/ip_vs.h
@@ -71,6 +71,7 @@ struct netns_ipvs {
int num_services; /* no of virtual services */
/* 1/rate drop and drop-entry variables */
+ struct delayed_work defense_work; /* Work handler */
int drop_rate;
int drop_counter;
atomic_t dropentry;
@@ -129,6 +130,8 @@ struct netns_ipvs {
/* multicast interface name */
char master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+ /* net name space ptr */
+ struct net *net; /* Needed by timer routines */
};
#endif /* IP_VS_H_ */
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 5ba205a..28bdaf7 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -1138,7 +1138,7 @@ static inline int todrop_entry(struct ip_vs_conn *cp)
}
/* Called from keventd and must protect itself from softirqs */
-void ip_vs_random_dropentry(void)
+void ip_vs_random_dropentry(struct net *net)
{
int idx;
struct ip_vs_conn *cp;
@@ -1158,7 +1158,8 @@ void ip_vs_random_dropentry(void)
if (cp->flags & IP_VS_CONN_F_TEMPLATE)
/* connection template */
continue;
-
+ if (!ip_vs_conn_net_eq(cp, net))
+ continue;
if (cp->protocol == IPPROTO_TCP) {
switch(cp->state) {
case IP_VS_TCP_S_SYN_RECV:
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index a7c59a7..bdda346 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1884,6 +1884,7 @@ static int __net_init __ip_vs_init(struct net *net)
pr_err("%s(): no memory.\n", __func__);
return -ENOMEM;
}
+ ipvs->net = net;
/* Counters used for creating unique names */
ipvs->gen = atomic_read(&ipvs_netns_cnt);
atomic_inc(&ipvs_netns_cnt);
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 183ac18..6a963d4 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -217,18 +217,16 @@ static void update_defense_level(struct netns_ipvs *ipvs)
* Timer for checking the defense
*/
#define DEFENSE_TIMER_PERIOD 1*HZ
-static void defense_work_handler(struct work_struct *work);
-static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
static void defense_work_handler(struct work_struct *work)
{
- struct netns_ipvs *ipvs = net_ipvs(&init_net);
+ struct netns_ipvs *ipvs =
+ container_of(work, struct netns_ipvs, defense_work.work);
update_defense_level(ipvs);
if (atomic_read(&ipvs->dropentry))
- ip_vs_random_dropentry();
-
- schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
+ ip_vs_random_dropentry(ipvs->net);
+ schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
}
int
@@ -3564,6 +3562,9 @@ int __net_init __ip_vs_control_init(struct net *net)
goto err_reg;
ip_vs_new_estimator(net, ipvs->tot_stats);
ipvs->sysctl_tbl = tbl;
+ /* Schedule defense work */
+ INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
+ schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
return 0;
err_reg:
@@ -3588,6 +3589,8 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net)
proc_net_remove(net, "ip_vs_stats_percpu");
proc_net_remove(net, "ip_vs_stats");
proc_net_remove(net, "ip_vs");
+ cancel_delayed_work_sync(&ipvs->defense_work);
+ cancel_work_sync(&ipvs->defense_work.work);
free_percpu(ipvs->cpustats);
kfree(ipvs->tot_stats);
}
@@ -3631,9 +3634,6 @@ int __init ip_vs_control_init(void)
goto err_net;
}
- /* Hook the defense timer */
- schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
-
LeaveFunction(2);
return 0;
@@ -3648,8 +3648,6 @@ void ip_vs_control_cleanup(void)
{
EnterFunction(2);
ip_vs_trash_cleanup();
- cancel_delayed_work_sync(&defense_work);
- cancel_work_sync(&defense_work.work);
unregister_pernet_subsys(&ipvs_control_ops);
ip_vs_genl_unregister();
nf_unregister_sockopt(&ip_vs_sockopts);
--
1.7.2.3
^ permalink raw reply related
* [PATCH 50/79] IPVS: netns, ip_vs_ctl local vars moved to ipvs struct.
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
Moving global vars to ipvs struct, except for svc table lock.
Next patch for ctl will be drop-rate handling.
*v3
__ip_vs_mutex remains global
ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 27 ++--
include/net/netns/ip_vs.h | 37 ++++-
net/netfilter/ipvs/ip_vs_conn.c | 7 +-
net/netfilter/ipvs/ip_vs_core.c | 34 +++--
net/netfilter/ipvs/ip_vs_ctl.c | 291 +++++++++++++++++----------------
net/netfilter/ipvs/ip_vs_proto_sctp.c | 2 +-
net/netfilter/ipvs/ip_vs_proto_tcp.c | 2 +-
net/netfilter/ipvs/ip_vs_proto_udp.c | 2 +-
net/netfilter/ipvs/ip_vs_sync.c | 9 +-
9 files changed, 230 insertions(+), 181 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index f82c0ff..af9acf4 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -41,7 +41,7 @@ static inline struct netns_ipvs *net_ipvs(struct net* net)
* Get net ptr from skb in traffic cases
* use skb_sknet when call is from userland (ioctl or netlink)
*/
-static inline struct net *skb_net(struct sk_buff *skb)
+static inline struct net *skb_net(const struct sk_buff *skb)
{
#ifdef CONFIG_NET_NS
#ifdef CONFIG_IP_VS_DEBUG
@@ -69,7 +69,7 @@ static inline struct net *skb_net(struct sk_buff *skb)
#endif
}
-static inline struct net *skb_sknet(struct sk_buff *skb)
+static inline struct net *skb_sknet(const struct sk_buff *skb)
{
#ifdef CONFIG_NET_NS
#ifdef CONFIG_IP_VS_DEBUG
@@ -1023,13 +1023,6 @@ extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
/*
* IPVS control data and functions (from ip_vs_ctl.c)
*/
-extern int sysctl_ip_vs_cache_bypass;
-extern int sysctl_ip_vs_expire_nodest_conn;
-extern int sysctl_ip_vs_expire_quiescent_template;
-extern int sysctl_ip_vs_sync_threshold[2];
-extern int sysctl_ip_vs_nat_icmp_send;
-extern int sysctl_ip_vs_conntrack;
-extern int sysctl_ip_vs_snat_reroute;
extern struct ip_vs_stats ip_vs_stats;
extern const struct ctl_path net_vs_ctl_path[];
extern int sysctl_ip_vs_sync_ver;
@@ -1119,11 +1112,13 @@ extern int ip_vs_icmp_xmit_v6
extern int ip_vs_drop_rate;
extern int ip_vs_drop_counter;
-static __inline__ int ip_vs_todrop(void)
+static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
{
- if (!ip_vs_drop_rate) return 0;
- if (--ip_vs_drop_counter > 0) return 0;
- ip_vs_drop_counter = ip_vs_drop_rate;
+ if (!ipvs->drop_rate)
+ return 0;
+ if (--ipvs->drop_counter > 0)
+ return 0;
+ ipvs->drop_counter = ipvs->drop_rate;
return 1;
}
@@ -1211,9 +1206,9 @@ static inline void ip_vs_notrack(struct sk_buff *skb)
* Netfilter connection tracking
* (from ip_vs_nfct.c)
*/
-static inline int ip_vs_conntrack_enabled(void)
+static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
{
- return sysctl_ip_vs_conntrack;
+ return ipvs->sysctl_conntrack;
}
extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
@@ -1226,7 +1221,7 @@ extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp);
#else
-static inline int ip_vs_conntrack_enabled(void)
+static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
{
return 0;
}
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
index 1acfb33..c4b1abf 100644
--- a/include/net/netns/ip_vs.h
+++ b/include/net/netns/ip_vs.h
@@ -61,13 +61,46 @@ struct netns_ipvs {
struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
spinlock_t sctp_app_lock;
#endif
+ /* ip_vs_conn */
+ atomic_t conn_count; /* connection counter */
+
/* ip_vs_ctl */
struct ip_vs_stats *tot_stats; /* Statistics & est. */
struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */
seqcount_t *ustats_seq; /* u64 read retry */
- /* ip_vs_conn */
- atomic_t conn_count; /* connection counter */
+ int num_services; /* no of virtual services */
+ /* 1/rate drop and drop-entry variables */
+ int drop_rate;
+ int drop_counter;
+ atomic_t dropentry;
+ /* locks in ctl.c */
+ spinlock_t dropentry_lock; /* drop entry handling */
+ spinlock_t droppacket_lock; /* drop packet handling */
+ spinlock_t securetcp_lock; /* state and timeout tables */
+ rwlock_t rs_lock; /* real services table */
+ /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
+ struct lock_class_key ctl_key; /* ctl_mutex debuging */
+ /* sys-ctl struct */
+ struct ctl_table_header *sysctl_hdr;
+ struct ctl_table *sysctl_tbl;
+ /* sysctl variables */
+ int sysctl_amemthresh;
+ int sysctl_am_droprate;
+ int sysctl_drop_entry;
+ int sysctl_drop_packet;
+ int sysctl_secure_tcp;
+#ifdef CONFIG_IP_VS_NFCT
+ int sysctl_conntrack;
+#endif
+ int sysctl_snat_reroute;
+ int sysctl_sync_ver;
+ int sysctl_cache_bypass;
+ int sysctl_expire_nodest_conn;
+ int sysctl_expire_quiescent_template;
+ int sysctl_sync_threshold[2];
+ int sysctl_nat_icmp_send;
+
/* ip_vs_lblc */
int sysctl_lblc_expiration;
struct ctl_table_header *lblc_ctl_header;
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 0d5e4fe..5ba205a 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -686,13 +686,14 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
int ip_vs_check_template(struct ip_vs_conn *ct)
{
struct ip_vs_dest *dest = ct->dest;
+ struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(ct));
/*
* Checking the dest server status.
*/
if ((dest == NULL) ||
!(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
- (sysctl_ip_vs_expire_quiescent_template &&
+ (ipvs->sysctl_expire_quiescent_template &&
(atomic_read(&dest->weight) == 0))) {
IP_VS_DBG_BUF(9, "check_template: dest not available for "
"protocol %s s:%s:%d v:%s:%d "
@@ -879,7 +880,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
* IP_VS_CONN_F_ONE_PACKET too.
*/
- if (ip_vs_conntrack_enabled())
+ if (ip_vs_conntrack_enabled(ipvs))
cp->flags |= IP_VS_CONN_F_NFCT;
/* Hash it in the ip_vs_conn_tab finally */
@@ -1198,7 +1199,7 @@ static void ip_vs_conn_flush(struct net *net)
struct ip_vs_conn *cp;
struct netns_ipvs *ipvs = net_ipvs(net);
- flush_again:
+flush_again:
for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
/*
* Lock is actually needed in this loop.
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 7205b49..a7c59a7 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -499,6 +499,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_proto_data *pd)
{
+ struct netns_ipvs *ipvs;
__be16 _ports[2], *pptr;
struct ip_vs_iphdr iph;
int unicast;
@@ -521,7 +522,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
/* if it is fwmark-based service, the cache_bypass sysctl is up
and the destination is a non-local unicast, then create
a cache_bypass connection entry */
- if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) {
+ ipvs = net_ipvs(skb_net(skb));
+ if (ipvs->sysctl_cache_bypass && svc->fwmark && unicast) {
int ret, cs;
struct ip_vs_conn *cp;
unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET &&
@@ -733,6 +735,7 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
struct ip_vs_protocol *pp,
unsigned int offset, unsigned int ihl)
{
+ struct netns_ipvs *ipvs;
unsigned int verdict = NF_DROP;
if (IP_VS_FWD_METHOD(cp) != 0) {
@@ -754,6 +757,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
if (!skb_make_writable(skb, offset))
goto out;
+ ipvs = net_ipvs(skb_net(skb));
+
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
ip_vs_nat_icmp_v6(skb, pp, cp, 1);
@@ -763,11 +768,11 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
- if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
+ if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
goto out;
} else
#endif
- if ((sysctl_ip_vs_snat_reroute ||
+ if ((ipvs->sysctl_snat_reroute ||
skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
ip_route_me_harder(skb, RTN_LOCAL) != 0)
goto out;
@@ -979,6 +984,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
struct ip_vs_conn *cp, int ihl)
{
struct ip_vs_protocol *pp = pd->pp;
+ struct netns_ipvs *ipvs;
IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
@@ -1014,13 +1020,15 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* if it came from this machine itself. So re-compute
* the routing information.
*/
+ ipvs = net_ipvs(skb_net(skb));
+
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
- if (sysctl_ip_vs_snat_reroute && ip6_route_me_harder(skb) != 0)
+ if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
goto drop;
} else
#endif
- if ((sysctl_ip_vs_snat_reroute ||
+ if ((ipvs->sysctl_snat_reroute ||
skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
ip_route_me_harder(skb, RTN_LOCAL) != 0)
goto drop;
@@ -1057,6 +1065,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
+ struct netns_ipvs *ipvs;
EnterFunction(11);
@@ -1131,10 +1140,11 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
* Check if the packet belongs to an existing entry
*/
cp = pp->conn_out_get(af, skb, &iph, iph.len, 0);
+ ipvs = net_ipvs(net);
if (likely(cp))
return handle_response(af, skb, pd, cp, iph.len);
- if (sysctl_ip_vs_nat_icmp_send &&
+ if (ipvs->sysctl_nat_icmp_send &&
(pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_UDP ||
pp->protocol == IPPROTO_SCTP)) {
@@ -1580,7 +1590,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
/* the destination server is not available */
- if (sysctl_ip_vs_expire_nodest_conn) {
+ if (ipvs->sysctl_expire_nodest_conn) {
/* try to expire the connection immediately */
ip_vs_conn_expire_now(cp);
}
@@ -1610,15 +1620,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
*/
if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
- pkts = sysctl_ip_vs_sync_threshold[0];
+ pkts = ipvs->sysctl_sync_threshold[0];
else
pkts = atomic_add_return(1, &cp->in_pkts);
if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
cp->protocol == IPPROTO_SCTP) {
if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
- (pkts % sysctl_ip_vs_sync_threshold[1]
- == sysctl_ip_vs_sync_threshold[0])) ||
+ (pkts % ipvs->sysctl_sync_threshold[1]
+ == ipvs->sysctl_sync_threshold[0])) ||
(cp->old_state != cp->state &&
((cp->state == IP_VS_SCTP_S_CLOSED) ||
(cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
@@ -1632,8 +1642,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
(((cp->protocol != IPPROTO_TCP ||
cp->state == IP_VS_TCP_S_ESTABLISHED) &&
- (pkts % sysctl_ip_vs_sync_threshold[1]
- == sysctl_ip_vs_sync_threshold[0])) ||
+ (pkts % ipvs->sysctl_sync_threshold[1]
+ == ipvs->sysctl_sync_threshold[0])) ||
((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
(cp->state == IP_VS_TCP_S_CLOSE) ||
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index cbd58c6..183ac18 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -58,42 +58,7 @@ static DEFINE_MUTEX(__ip_vs_mutex);
/* lock for service table */
static DEFINE_RWLOCK(__ip_vs_svc_lock);
-/* lock for table with the real services */
-static DEFINE_RWLOCK(__ip_vs_rs_lock);
-
-/* lock for state and timeout tables */
-static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
-
-/* lock for drop entry handling */
-static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
-
-/* lock for drop packet handling */
-static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
-
-/* 1/rate drop and drop-entry variables */
-int ip_vs_drop_rate = 0;
-int ip_vs_drop_counter = 0;
-static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
-
-/* number of virtual services */
-static int ip_vs_num_services = 0;
-
/* sysctl variables */
-static int sysctl_ip_vs_drop_entry = 0;
-static int sysctl_ip_vs_drop_packet = 0;
-static int sysctl_ip_vs_secure_tcp = 0;
-static int sysctl_ip_vs_amemthresh = 1024;
-static int sysctl_ip_vs_am_droprate = 10;
-int sysctl_ip_vs_cache_bypass = 0;
-int sysctl_ip_vs_expire_nodest_conn = 0;
-int sysctl_ip_vs_expire_quiescent_template = 0;
-int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
-int sysctl_ip_vs_nat_icmp_send = 0;
-#ifdef CONFIG_IP_VS_NFCT
-int sysctl_ip_vs_conntrack;
-#endif
-int sysctl_ip_vs_snat_reroute = 1;
-int sysctl_ip_vs_sync_ver = 1; /* Default version of sync proto */
#ifdef CONFIG_IP_VS_DEBUG
static int sysctl_ip_vs_debug_level = 0;
@@ -142,73 +107,73 @@ static void update_defense_level(struct netns_ipvs *ipvs)
/* si_swapinfo(&i); */
/* availmem = availmem - (i.totalswap - i.freeswap); */
- nomem = (availmem < sysctl_ip_vs_amemthresh);
+ nomem = (availmem < ipvs->sysctl_amemthresh);
local_bh_disable();
/* drop_entry */
- spin_lock(&__ip_vs_dropentry_lock);
- switch (sysctl_ip_vs_drop_entry) {
+ spin_lock(&ipvs->dropentry_lock);
+ switch (ipvs->sysctl_drop_entry) {
case 0:
- atomic_set(&ip_vs_dropentry, 0);
+ atomic_set(&ipvs->dropentry, 0);
break;
case 1:
if (nomem) {
- atomic_set(&ip_vs_dropentry, 1);
- sysctl_ip_vs_drop_entry = 2;
+ atomic_set(&ipvs->dropentry, 1);
+ ipvs->sysctl_drop_entry = 2;
} else {
- atomic_set(&ip_vs_dropentry, 0);
+ atomic_set(&ipvs->dropentry, 0);
}
break;
case 2:
if (nomem) {
- atomic_set(&ip_vs_dropentry, 1);
+ atomic_set(&ipvs->dropentry, 1);
} else {
- atomic_set(&ip_vs_dropentry, 0);
- sysctl_ip_vs_drop_entry = 1;
+ atomic_set(&ipvs->dropentry, 0);
+ ipvs->sysctl_drop_entry = 1;
};
break;
case 3:
- atomic_set(&ip_vs_dropentry, 1);
+ atomic_set(&ipvs->dropentry, 1);
break;
}
- spin_unlock(&__ip_vs_dropentry_lock);
+ spin_unlock(&ipvs->dropentry_lock);
/* drop_packet */
- spin_lock(&__ip_vs_droppacket_lock);
- switch (sysctl_ip_vs_drop_packet) {
+ spin_lock(&ipvs->droppacket_lock);
+ switch (ipvs->sysctl_drop_packet) {
case 0:
- ip_vs_drop_rate = 0;
+ ipvs->drop_rate = 0;
break;
case 1:
if (nomem) {
- ip_vs_drop_rate = ip_vs_drop_counter
- = sysctl_ip_vs_amemthresh /
- (sysctl_ip_vs_amemthresh-availmem);
- sysctl_ip_vs_drop_packet = 2;
+ ipvs->drop_rate = ipvs->drop_counter
+ = ipvs->sysctl_amemthresh /
+ (ipvs->sysctl_amemthresh-availmem);
+ ipvs->sysctl_drop_packet = 2;
} else {
- ip_vs_drop_rate = 0;
+ ipvs->drop_rate = 0;
}
break;
case 2:
if (nomem) {
- ip_vs_drop_rate = ip_vs_drop_counter
- = sysctl_ip_vs_amemthresh /
- (sysctl_ip_vs_amemthresh-availmem);
+ ipvs->drop_rate = ipvs->drop_counter
+ = ipvs->sysctl_amemthresh /
+ (ipvs->sysctl_amemthresh-availmem);
} else {
- ip_vs_drop_rate = 0;
- sysctl_ip_vs_drop_packet = 1;
+ ipvs->drop_rate = 0;
+ ipvs->sysctl_drop_packet = 1;
}
break;
case 3:
- ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
+ ipvs->drop_rate = ipvs->sysctl_am_droprate;
break;
}
- spin_unlock(&__ip_vs_droppacket_lock);
+ spin_unlock(&ipvs->droppacket_lock);
/* secure_tcp */
- spin_lock(&ip_vs_securetcp_lock);
- switch (sysctl_ip_vs_secure_tcp) {
+ spin_lock(&ipvs->securetcp_lock);
+ switch (ipvs->sysctl_secure_tcp) {
case 0:
if (old_secure_tcp >= 2)
to_change = 0;
@@ -217,7 +182,7 @@ static void update_defense_level(struct netns_ipvs *ipvs)
if (nomem) {
if (old_secure_tcp < 2)
to_change = 1;
- sysctl_ip_vs_secure_tcp = 2;
+ ipvs->sysctl_secure_tcp = 2;
} else {
if (old_secure_tcp >= 2)
to_change = 0;
@@ -230,7 +195,7 @@ static void update_defense_level(struct netns_ipvs *ipvs)
} else {
if (old_secure_tcp >= 2)
to_change = 0;
- sysctl_ip_vs_secure_tcp = 1;
+ ipvs->sysctl_secure_tcp = 1;
}
break;
case 3:
@@ -238,11 +203,11 @@ static void update_defense_level(struct netns_ipvs *ipvs)
to_change = 1;
break;
}
- old_secure_tcp = sysctl_ip_vs_secure_tcp;
+ old_secure_tcp = ipvs->sysctl_secure_tcp;
if (to_change >= 0)
ip_vs_protocol_timeout_change(ipvs,
- sysctl_ip_vs_secure_tcp > 1);
- spin_unlock(&ip_vs_securetcp_lock);
+ ipvs->sysctl_secure_tcp > 1);
+ spin_unlock(&ipvs->securetcp_lock);
local_bh_enable();
}
@@ -260,7 +225,7 @@ static void defense_work_handler(struct work_struct *work)
struct netns_ipvs *ipvs = net_ipvs(&init_net);
update_defense_level(ipvs);
- if (atomic_read(&ip_vs_dropentry))
+ if (atomic_read(&ipvs->dropentry))
ip_vs_random_dropentry();
schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
@@ -602,7 +567,7 @@ ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
*/
hash = ip_vs_rs_hashkey(af, daddr, dport);
- read_lock(&__ip_vs_rs_lock);
+ read_lock(&ipvs->rs_lock);
list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
if ((dest->af == af)
&& ip_vs_addr_equal(af, &dest->addr, daddr)
@@ -610,11 +575,11 @@ ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
&& ((dest->protocol == protocol) ||
dest->vfwmark)) {
/* HIT */
- read_unlock(&__ip_vs_rs_lock);
+ read_unlock(&ipvs->rs_lock);
return dest;
}
}
- read_unlock(&__ip_vs_rs_lock);
+ read_unlock(&ipvs->rs_lock);
return NULL;
}
@@ -788,9 +753,9 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
* Put the real service in rs_table if not present.
* For now only for NAT!
*/
- write_lock_bh(&__ip_vs_rs_lock);
+ write_lock_bh(&ipvs->rs_lock);
ip_vs_rs_hash(ipvs, dest);
- write_unlock_bh(&__ip_vs_rs_lock);
+ write_unlock_bh(&ipvs->rs_lock);
}
atomic_set(&dest->conn_flags, conn_flags);
@@ -1022,14 +987,16 @@ ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
*/
static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
ip_vs_kill_estimator(net, &dest->stats);
/*
* Remove it from the d-linked list with the real services.
*/
- write_lock_bh(&__ip_vs_rs_lock);
+ write_lock_bh(&ipvs->rs_lock);
ip_vs_rs_unhash(dest);
- write_unlock_bh(&__ip_vs_rs_lock);
+ write_unlock_bh(&ipvs->rs_lock);
/*
* Decrease the refcnt of the dest, and free the dest
@@ -1092,7 +1059,6 @@ static int
ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
{
struct ip_vs_dest *dest;
- struct net *net = svc->net;
__be16 dport = udest->port;
EnterFunction(2);
@@ -1121,7 +1087,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
/*
* Delete the destination
*/
- __ip_vs_del_dest(net, dest);
+ __ip_vs_del_dest(svc->net, dest);
LeaveFunction(2);
@@ -1140,6 +1106,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
struct ip_vs_scheduler *sched = NULL;
struct ip_vs_pe *pe = NULL;
struct ip_vs_service *svc = NULL;
+ struct netns_ipvs *ipvs = net_ipvs(net);
/* increase the module use count */
ip_vs_use_count_inc();
@@ -1219,7 +1186,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
/* Count only IPv4 services for old get/setsockopt interface */
if (svc->af == AF_INET)
- ip_vs_num_services++;
+ ipvs->num_services++;
/* Hash the service into the service table */
write_lock_bh(&__ip_vs_svc_lock);
@@ -1359,12 +1326,13 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
struct ip_vs_dest *dest, *nxt;
struct ip_vs_scheduler *old_sched;
struct ip_vs_pe *old_pe;
+ struct netns_ipvs *ipvs = net_ipvs(svc->net);
pr_info("%s: enter\n", __func__);
/* Count only IPv4 services for old get/setsockopt interface */
if (svc->af == AF_INET)
- ip_vs_num_services--;
+ ipvs->num_services--;
ip_vs_kill_estimator(svc->net, &svc->stats);
@@ -1589,42 +1557,31 @@ proc_do_sync_mode(ctl_table *table, int write,
/*
* IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
+ * Do not change order or insert new entries without
+ * align with netns init in __ip_vs_control_init()
*/
static struct ctl_table vs_vars[] = {
{
.procname = "amemthresh",
- .data = &sysctl_ip_vs_amemthresh,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
-#ifdef CONFIG_IP_VS_DEBUG
- {
- .procname = "debug_level",
- .data = &sysctl_ip_vs_debug_level,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
-#endif
{
.procname = "am_droprate",
- .data = &sysctl_ip_vs_am_droprate,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.procname = "drop_entry",
- .data = &sysctl_ip_vs_drop_entry,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_do_defense_mode,
},
{
.procname = "drop_packet",
- .data = &sysctl_ip_vs_drop_packet,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_do_defense_mode,
@@ -1632,7 +1589,6 @@ static struct ctl_table vs_vars[] = {
#ifdef CONFIG_IP_VS_NFCT
{
.procname = "conntrack",
- .data = &sysctl_ip_vs_conntrack,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
@@ -1640,25 +1596,62 @@ static struct ctl_table vs_vars[] = {
#endif
{
.procname = "secure_tcp",
- .data = &sysctl_ip_vs_secure_tcp,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_do_defense_mode,
},
{
.procname = "snat_reroute",
- .data = &sysctl_ip_vs_snat_reroute,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_dointvec,
},
{
.procname = "sync_version",
- .data = &sysctl_ip_vs_sync_ver,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = &proc_do_sync_mode,
},
+ {
+ .procname = "cache_bypass",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "expire_nodest_conn",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "expire_quiescent_template",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "sync_threshold",
+ .maxlen =
+ sizeof(((struct netns_ipvs *)0)->sysctl_sync_threshold),
+ .mode = 0644,
+ .proc_handler = proc_do_sync_threshold,
+ },
+ {
+ .procname = "nat_icmp_send",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#ifdef CONFIG_IP_VS_DEBUG
+ {
+ .procname = "debug_level",
+ .data = &sysctl_ip_vs_debug_level,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
#if 0
{
.procname = "timeout_established",
@@ -1745,41 +1738,6 @@ static struct ctl_table vs_vars[] = {
.proc_handler = proc_dointvec_jiffies,
},
#endif
- {
- .procname = "cache_bypass",
- .data = &sysctl_ip_vs_cache_bypass,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "expire_nodest_conn",
- .data = &sysctl_ip_vs_expire_nodest_conn,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "expire_quiescent_template",
- .data = &sysctl_ip_vs_expire_quiescent_template,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "sync_threshold",
- .data = &sysctl_ip_vs_sync_threshold,
- .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
- .mode = 0644,
- .proc_handler = proc_do_sync_threshold,
- },
- {
- .procname = "nat_icmp_send",
- .data = &sysctl_ip_vs_nat_icmp_send,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
{ }
};
@@ -1791,8 +1749,6 @@ const struct ctl_path net_vs_ctl_path[] = {
};
EXPORT_SYMBOL_GPL(net_vs_ctl_path);
-static struct ctl_table_header * sysctl_header;
-
#ifdef CONFIG_PROC_FS
struct ip_vs_iter {
@@ -2543,7 +2499,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
struct ip_vs_getinfo info;
info.version = IP_VS_VERSION_CODE;
info.size = ip_vs_conn_tab_size;
- info.num_services = ip_vs_num_services;
+ info.num_services = ipvs->num_services;
if (copy_to_user(user, &info, sizeof(info)) != 0)
ret = -EFAULT;
}
@@ -3014,7 +2970,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
struct ip_vs_service *svc;
struct ip_vs_dest *dest;
struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
- struct net *net;
+ struct net *net = skb_sknet(skb);
mutex_lock(&__ip_vs_mutex);
@@ -3023,7 +2979,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
goto out_err;
- net = skb_sknet(skb);
+
svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
if (IS_ERR(svc) || svc == NULL)
goto out_err;
@@ -3215,8 +3171,10 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
int ret = 0, cmd;
int need_full_svc = 0, need_full_dest = 0;
struct net *net;
+ struct netns_ipvs *ipvs;
net = skb_sknet(skb);
+ ipvs = net_ipvs(net);
cmd = info->genlhdr->cmd;
mutex_lock(&__ip_vs_mutex);
@@ -3326,8 +3284,10 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
void *reply;
int ret, cmd, reply_cmd;
struct net *net;
+ struct netns_ipvs *ipvs;
net = skb_sknet(skb);
+ ipvs = net_ipvs(net);
cmd = info->genlhdr->cmd;
if (cmd == IPVS_CMD_GET_SERVICE)
@@ -3530,9 +3490,21 @@ int __net_init __ip_vs_control_init(struct net *net)
{
int idx;
struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ctl_table *tbl;
if (!net_eq(net, &init_net)) /* netns not enabled yet */
return -EPERM;
+
+ atomic_set(&ipvs->dropentry, 0);
+ spin_lock_init(&ipvs->dropentry_lock);
+ spin_lock_init(&ipvs->droppacket_lock);
+ spin_lock_init(&ipvs->securetcp_lock);
+ ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
+
+ /* Initialize rs_table */
+ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
+ INIT_LIST_HEAD(&ipvs->rs_table[idx]);
+
/* procfs stats */
ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
if (ipvs->tot_stats == NULL) {
@@ -3553,14 +3525,51 @@ int __net_init __ip_vs_control_init(struct net *net)
proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
&ip_vs_stats_percpu_fops);
- sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path,
+
+ if (!net_eq(net, &init_net)) {
+ tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
+ if (tbl == NULL)
+ goto err_dup;
+ } else
+ tbl = vs_vars;
+ /* Initialize sysctl defaults */
+ idx = 0;
+ ipvs->sysctl_amemthresh = 1024;
+ tbl[idx++].data = &ipvs->sysctl_amemthresh;
+ ipvs->sysctl_am_droprate = 10;
+ tbl[idx++].data = &ipvs->sysctl_am_droprate;
+ tbl[idx++].data = &ipvs->sysctl_drop_entry;
+ tbl[idx++].data = &ipvs->sysctl_drop_packet;
+#ifdef CONFIG_IP_VS_NFCT
+ tbl[idx++].data = &ipvs->sysctl_conntrack;
+#endif
+ tbl[idx++].data = &ipvs->sysctl_secure_tcp;
+ ipvs->sysctl_snat_reroute = 1;
+ tbl[idx++].data = &ipvs->sysctl_snat_reroute;
+ ipvs->sysctl_sync_ver = 1;
+ tbl[idx++].data = &ipvs->sysctl_sync_ver;
+ tbl[idx++].data = &ipvs->sysctl_cache_bypass;
+ tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
+ tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
+ ipvs->sysctl_sync_threshold[0] = 3;
+ ipvs->sysctl_sync_threshold[1] = 50;
+ tbl[idx].data = &ipvs->sysctl_sync_threshold;
+ tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
+ tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
+
+
+ ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
vs_vars);
- if (sysctl_header == NULL)
+ if (ipvs->sysctl_hdr == NULL)
goto err_reg;
ip_vs_new_estimator(net, ipvs->tot_stats);
+ ipvs->sysctl_tbl = tbl;
return 0;
err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(tbl);
+err_dup:
free_percpu(ipvs->cpustats);
err_alloc:
kfree(ipvs->tot_stats);
@@ -3575,7 +3584,7 @@ static void __net_exit __ip_vs_control_cleanup(struct net *net)
return;
ip_vs_kill_estimator(net, ipvs->tot_stats);
- unregister_net_sysctl_table(sysctl_header);
+ unregister_net_sysctl_table(ipvs->sysctl_hdr);
proc_net_remove(net, "ip_vs_stats_percpu");
proc_net_remove(net, "ip_vs_stats");
proc_net_remove(net, "ip_vs");
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 550365a..fb2d04a 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -34,7 +34,7 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
&iph.daddr, sh->dest))) {
int ignored;
- if (ip_vs_todrop()) {
+ if (ip_vs_todrop(net_ipvs(net))) {
/*
* It seems that we are very loaded.
* We have to drop this packet :(
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index d8b3f9f..c0cc341 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -54,7 +54,7 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
&iph.daddr, th->dest))) {
int ignored;
- if (ip_vs_todrop()) {
+ if (ip_vs_todrop(net_ipvs(net))) {
/*
* It seems that we are very loaded.
* We have to drop this packet :(
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 581157b..f1282cb 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -50,7 +50,7 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
if (svc) {
int ignored;
- if (ip_vs_todrop()) {
+ if (ip_vs_todrop(net_ipvs(net))) {
/*
* It seems that we are very loaded.
* We have to drop this packet :(
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index f85e47d..b178056 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -394,7 +394,7 @@ void ip_vs_sync_switch_mode(struct net *net, int mode)
if (!ipvs->sync_state & IP_VS_STATE_MASTER)
return;
- if (mode == sysctl_ip_vs_sync_ver || !ipvs->sync_buff)
+ if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff)
return;
spin_lock_bh(&ipvs->sync_buff_lock);
@@ -521,7 +521,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
unsigned int len, pe_name_len, pad;
/* Handle old version of the protocol */
- if (sysctl_ip_vs_sync_ver == 0) {
+ if (ipvs->sysctl_sync_ver == 0) {
ip_vs_sync_conn_v0(net, cp);
return;
}
@@ -650,7 +650,7 @@ control:
if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
int pkts = atomic_add_return(1, &cp->in_pkts);
- if (pkts % sysctl_ip_vs_sync_threshold[1] != 1)
+ if (pkts % ipvs->sysctl_sync_threshold[1] != 1)
return;
}
goto sloop;
@@ -724,6 +724,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
{
struct ip_vs_dest *dest;
struct ip_vs_conn *cp;
+ struct netns_ipvs *ipvs = net_ipvs(net);
if (!(flags & IP_VS_CONN_F_TEMPLATE))
cp = ip_vs_conn_in_get(param);
@@ -794,7 +795,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
if (opt)
memcpy(&cp->in_seq, opt, sizeof(*opt));
- atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
+ atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]);
cp->state = state;
cp->old_state = cp->state;
/*
--
1.7.2.3
^ permalink raw reply related
* [PATCH 49/79] IPVS: netns, connection hash got net as param.
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
Connection hash table is now name space aware.
i.e. net ptr >> 8 is xor:ed to the hash,
and this is the first param to be compared.
The net struct is 0xa40 in size ( a little bit smaller for 32 bit arch:s)
and cache-line aligned, so a ptr >> 5 might be a more clever solution ?
All lookups where net is compared uses net_eq() which returns 1 when netns
is disabled, and the compiler seems to do something clever in that case.
ip_vs_conn_fill_param() have *net as first param now.
Three new inlines added to keep conn struct smaller
when names space is disabled.
- ip_vs_conn_net()
- ip_vs_conn_net_set()
- ip_vs_conn_net_eq()
*v3
moved net compare to the end in "fast path"
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 53 ++++++++++++---
include/net/netns/ip_vs.h | 2 +
net/netfilter/ipvs/ip_vs_conn.c | 112 +++++++++++++++++++------------
net/netfilter/ipvs/ip_vs_core.c | 15 +++--
net/netfilter/ipvs/ip_vs_ftp.c | 14 ++--
net/netfilter/ipvs/ip_vs_nfct.c | 6 +-
net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 15 +++--
net/netfilter/ipvs/ip_vs_proto_sctp.c | 2 +-
net/netfilter/ipvs/ip_vs_proto_tcp.c | 2 +-
net/netfilter/ipvs/ip_vs_proto_udp.c | 2 +-
net/netfilter/ipvs/ip_vs_sync.c | 13 ++--
11 files changed, 153 insertions(+), 83 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 605d5db..f82c0ff 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -477,6 +477,7 @@ extern struct ip_vs_proto_data *ip_vs_proto_data_get(struct net *net,
unsigned short proto);
struct ip_vs_conn_param {
+ struct net *net;
const union nf_inet_addr *caddr;
const union nf_inet_addr *vaddr;
__be16 cport;
@@ -494,17 +495,19 @@ struct ip_vs_conn_param {
*/
struct ip_vs_conn {
struct list_head c_list; /* hashed list heads */
-
+#ifdef CONFIG_NET_NS
+ struct net *net; /* Name space */
+#endif
/* Protocol, addresses and port numbers */
- u16 af; /* address family */
- union nf_inet_addr caddr; /* client address */
- union nf_inet_addr vaddr; /* virtual address */
- union nf_inet_addr daddr; /* destination address */
- volatile __u32 flags; /* status flags */
- __u32 fwmark; /* Fire wall mark from skb */
- __be16 cport;
- __be16 vport;
- __be16 dport;
+ u16 af; /* address family */
+ __be16 cport;
+ __be16 vport;
+ __be16 dport;
+ __u32 fwmark; /* Fire wall mark from skb */
+ union nf_inet_addr caddr; /* client address */
+ union nf_inet_addr vaddr; /* virtual address */
+ union nf_inet_addr daddr; /* destination address */
+ volatile __u32 flags; /* status flags */
__u16 protocol; /* Which protocol (TCP/UDP) */
/* counter and timer */
@@ -547,6 +550,33 @@ struct ip_vs_conn {
__u8 pe_data_len;
};
+/*
+ * To save some memory in conn table when name space is disabled.
+ */
+static inline struct net *ip_vs_conn_net(const struct ip_vs_conn *cp)
+{
+#ifdef CONFIG_NET_NS
+ return cp->net;
+#else
+ return &init_net;
+#endif
+}
+static inline void ip_vs_conn_net_set(struct ip_vs_conn *cp, struct net *net)
+{
+#ifdef CONFIG_NET_NS
+ cp->net = net;
+#endif
+}
+
+static inline int ip_vs_conn_net_eq(const struct ip_vs_conn *cp,
+ struct net *net)
+{
+#ifdef CONFIG_NET_NS
+ return cp->net == net;
+#else
+ return 1;
+#endif
+}
/*
* Extended internal versions of struct ip_vs_service_user and
@@ -796,13 +826,14 @@ enum {
IP_VS_DIR_LAST,
};
-static inline void ip_vs_conn_fill_param(int af, int protocol,
+static inline void ip_vs_conn_fill_param(struct net *net, int af, int protocol,
const union nf_inet_addr *caddr,
__be16 cport,
const union nf_inet_addr *vaddr,
__be16 vport,
struct ip_vs_conn_param *p)
{
+ p->net = net;
p->af = af;
p->protocol = protocol;
p->caddr = caddr;
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
index bd1dad8..1acfb33 100644
--- a/include/net/netns/ip_vs.h
+++ b/include/net/netns/ip_vs.h
@@ -66,6 +66,8 @@ struct netns_ipvs {
struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */
seqcount_t *ustats_seq; /* u64 read retry */
+ /* ip_vs_conn */
+ atomic_t conn_count; /* connection counter */
/* ip_vs_lblc */
int sysctl_lblc_expiration;
struct ctl_table_header *lblc_ctl_header;
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index b2024c9..0d5e4fe 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -64,9 +64,6 @@ static struct list_head *ip_vs_conn_tab __read_mostly;
/* SLAB cache for IPVS connections */
static struct kmem_cache *ip_vs_conn_cachep __read_mostly;
-/* counter for current IPVS connections */
-static atomic_t ip_vs_conn_count = ATOMIC_INIT(0);
-
/* counter for no client port connections */
static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0);
@@ -76,7 +73,7 @@ static unsigned int ip_vs_conn_rnd __read_mostly;
/*
* Fine locking granularity for big connection hash table
*/
-#define CT_LOCKARRAY_BITS 4
+#define CT_LOCKARRAY_BITS 5
#define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS)
#define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1)
@@ -133,19 +130,19 @@ static inline void ct_write_unlock_bh(unsigned key)
/*
* Returns hash value for IPVS connection entry
*/
-static unsigned int ip_vs_conn_hashkey(int af, unsigned proto,
+static unsigned int ip_vs_conn_hashkey(struct net *net, int af, unsigned proto,
const union nf_inet_addr *addr,
__be16 port)
{
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
- return jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
- (__force u32)port, proto, ip_vs_conn_rnd)
- & ip_vs_conn_tab_mask;
+ return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd),
+ (__force u32)port, proto, ip_vs_conn_rnd) ^
+ ((size_t)net>>8)) & ip_vs_conn_tab_mask;
#endif
- return jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
- ip_vs_conn_rnd)
- & ip_vs_conn_tab_mask;
+ return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto,
+ ip_vs_conn_rnd) ^
+ ((size_t)net>>8)) & ip_vs_conn_tab_mask;
}
static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
@@ -166,15 +163,15 @@ static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p,
port = p->vport;
}
- return ip_vs_conn_hashkey(p->af, p->protocol, addr, port);
+ return ip_vs_conn_hashkey(p->net, p->af, p->protocol, addr, port);
}
static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport,
- NULL, 0, &p);
+ ip_vs_conn_fill_param(ip_vs_conn_net(cp), cp->af, cp->protocol,
+ &cp->caddr, cp->cport, NULL, 0, &p);
if (cp->pe) {
p.pe = cp->pe;
@@ -186,7 +183,7 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp)
}
/*
- * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port.
+ * Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port.
* returns bool success.
*/
static inline int ip_vs_conn_hash(struct ip_vs_conn *cp)
@@ -269,11 +266,12 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p)
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
if (cp->af == p->af &&
+ p->cport == cp->cport && p->vport == cp->vport &&
ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) &&
ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) &&
- p->cport == cp->cport && p->vport == cp->vport &&
((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) &&
- p->protocol == cp->protocol) {
+ p->protocol == cp->protocol &&
+ ip_vs_conn_net_eq(cp, p->net)) {
/* HIT */
atomic_inc(&cp->refcnt);
ct_read_unlock(hash);
@@ -313,17 +311,18 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
struct ip_vs_conn_param *p)
{
__be16 _ports[2], *pptr;
+ struct net *net = skb_net(skb);
pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports);
if (pptr == NULL)
return 1;
if (likely(!inverse))
- ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0],
- &iph->daddr, pptr[1], p);
+ ip_vs_conn_fill_param(net, af, iph->protocol, &iph->saddr,
+ pptr[0], &iph->daddr, pptr[1], p);
else
- ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1],
- &iph->saddr, pptr[0], p);
+ ip_vs_conn_fill_param(net, af, iph->protocol, &iph->daddr,
+ pptr[1], &iph->saddr, pptr[0], p);
return 0;
}
@@ -352,6 +351,8 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p)
ct_read_lock(hash);
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
+ if (!ip_vs_conn_net_eq(cp, p->net))
+ continue;
if (p->pe_data && p->pe->ct_match) {
if (p->pe == cp->pe && p->pe->ct_match(p, cp))
goto out;
@@ -403,10 +404,11 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) {
if (cp->af == p->af &&
+ p->vport == cp->cport && p->cport == cp->dport &&
ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) &&
ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) &&
- p->vport == cp->cport && p->cport == cp->dport &&
- p->protocol == cp->protocol) {
+ p->protocol == cp->protocol &&
+ ip_vs_conn_net_eq(cp, p->net)) {
/* HIT */
atomic_inc(&cp->refcnt);
ret = cp;
@@ -609,8 +611,8 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
struct ip_vs_dest *dest;
if ((cp) && (!cp->dest)) {
- dest = ip_vs_find_dest(&init_net, cp->af, &cp->daddr, cp->dport,
- &cp->vaddr, cp->vport,
+ dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,
+ cp->dport, &cp->vaddr, cp->vport,
cp->protocol, cp->fwmark);
ip_vs_bind_dest(cp, dest);
return dest;
@@ -728,6 +730,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
static void ip_vs_conn_expire(unsigned long data)
{
struct ip_vs_conn *cp = (struct ip_vs_conn *)data;
+ struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
cp->timeout = 60*HZ;
@@ -770,7 +773,7 @@ static void ip_vs_conn_expire(unsigned long data)
ip_vs_unbind_dest(cp);
if (cp->flags & IP_VS_CONN_F_NO_CPORT)
atomic_dec(&ip_vs_conn_no_cport_cnt);
- atomic_dec(&ip_vs_conn_count);
+ atomic_dec(&ipvs->conn_count);
kmem_cache_free(ip_vs_conn_cachep, cp);
return;
@@ -804,7 +807,9 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
struct ip_vs_dest *dest, __u32 fwmark)
{
struct ip_vs_conn *cp;
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, p->protocol);
+ struct netns_ipvs *ipvs = net_ipvs(p->net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(p->net,
+ p->protocol);
cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
if (cp == NULL) {
@@ -814,6 +819,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
INIT_LIST_HEAD(&cp->c_list);
setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
+ ip_vs_conn_net_set(cp, p->net);
cp->af = p->af;
cp->protocol = p->protocol;
ip_vs_addr_copy(p->af, &cp->caddr, p->caddr);
@@ -844,7 +850,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
atomic_set(&cp->n_control, 0);
atomic_set(&cp->in_pkts, 0);
- atomic_inc(&ip_vs_conn_count);
+ atomic_inc(&ipvs->conn_count);
if (flags & IP_VS_CONN_F_NO_CPORT)
atomic_inc(&ip_vs_conn_no_cport_cnt);
@@ -886,17 +892,22 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
* /proc/net/ip_vs_conn entries
*/
#ifdef CONFIG_PROC_FS
+struct ip_vs_iter_state {
+ struct seq_net_private p;
+ struct list_head *l;
+};
static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
{
int idx;
struct ip_vs_conn *cp;
+ struct ip_vs_iter_state *iter = seq->private;
for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
ct_read_lock_bh(idx);
list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
if (pos-- == 0) {
- seq->private = &ip_vs_conn_tab[idx];
+ iter->l = &ip_vs_conn_tab[idx];
return cp;
}
}
@@ -908,14 +919,17 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos)
static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos)
{
- seq->private = NULL;
+ struct ip_vs_iter_state *iter = seq->private;
+
+ iter->l = NULL;
return *pos ? ip_vs_conn_array(seq, *pos - 1) :SEQ_START_TOKEN;
}
static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct ip_vs_conn *cp = v;
- struct list_head *e, *l = seq->private;
+ struct ip_vs_iter_state *iter = seq->private;
+ struct list_head *e, *l = iter->l;
int idx;
++*pos;
@@ -932,18 +946,19 @@ static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos)
while (++idx < ip_vs_conn_tab_size) {
ct_read_lock_bh(idx);
list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
- seq->private = &ip_vs_conn_tab[idx];
+ iter->l = &ip_vs_conn_tab[idx];
return cp;
}
ct_read_unlock_bh(idx);
}
- seq->private = NULL;
+ iter->l = NULL;
return NULL;
}
static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v)
{
- struct list_head *l = seq->private;
+ struct ip_vs_iter_state *iter = seq->private;
+ struct list_head *l = iter->l;
if (l)
ct_read_unlock_bh(l - ip_vs_conn_tab);
@@ -957,9 +972,12 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v)
"Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n");
else {
const struct ip_vs_conn *cp = v;
+ struct net *net = seq_file_net(seq);
char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3];
size_t len = 0;
+ if (!ip_vs_conn_net_eq(cp, net))
+ return 0;
if (cp->pe_data) {
pe_data[0] = ' ';
len = strlen(cp->pe->name);
@@ -1004,7 +1022,8 @@ static const struct seq_operations ip_vs_conn_seq_ops = {
static int ip_vs_conn_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &ip_vs_conn_seq_ops);
+ return seq_open_net(inode, file, &ip_vs_conn_seq_ops,
+ sizeof(struct ip_vs_iter_state));
}
static const struct file_operations ip_vs_conn_fops = {
@@ -1031,6 +1050,10 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
"Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n");
else {
const struct ip_vs_conn *cp = v;
+ struct net *net = seq_file_net(seq);
+
+ if (!ip_vs_conn_net_eq(cp, net))
+ return 0;
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
@@ -1067,7 +1090,8 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = {
static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &ip_vs_conn_sync_seq_ops);
+ return seq_open_net(inode, file, &ip_vs_conn_sync_seq_ops,
+ sizeof(struct ip_vs_iter_state));
}
static const struct file_operations ip_vs_conn_sync_fops = {
@@ -1168,10 +1192,11 @@ void ip_vs_random_dropentry(void)
/*
* Flush all the connection entries in the ip_vs_conn_tab
*/
-static void ip_vs_conn_flush(void)
+static void ip_vs_conn_flush(struct net *net)
{
int idx;
struct ip_vs_conn *cp;
+ struct netns_ipvs *ipvs = net_ipvs(net);
flush_again:
for (idx = 0; idx < ip_vs_conn_tab_size; idx++) {
@@ -1181,7 +1206,8 @@ static void ip_vs_conn_flush(void)
ct_write_lock_bh(idx);
list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
-
+ if (!ip_vs_conn_net_eq(cp, net))
+ continue;
IP_VS_DBG(4, "del connection\n");
ip_vs_conn_expire_now(cp);
if (cp->control) {
@@ -1194,7 +1220,7 @@ static void ip_vs_conn_flush(void)
/* the counter may be not NULL, because maybe some conn entries
are run by slow timer handler or unhashed but still referred */
- if (atomic_read(&ip_vs_conn_count) != 0) {
+ if (atomic_read(&ipvs->conn_count) != 0) {
schedule();
goto flush_again;
}
@@ -1204,8 +1230,11 @@ static void ip_vs_conn_flush(void)
*/
int __net_init __ip_vs_conn_init(struct net *net)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
if (!net_eq(net, &init_net)) /* netns not enabled yet */
return -EPERM;
+ atomic_set(&ipvs->conn_count, 0);
proc_net_fops_create(net, "ip_vs_conn", 0, &ip_vs_conn_fops);
proc_net_fops_create(net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
@@ -1217,6 +1246,8 @@ static void __net_exit __ip_vs_conn_cleanup(struct net *net)
if (!net_eq(net, &init_net)) /* netns not enabled yet */
return;
+ /* flush all the connection entries first */
+ ip_vs_conn_flush(net);
proc_net_remove(net, "ip_vs_conn");
proc_net_remove(net, "ip_vs_conn_sync");
}
@@ -1277,9 +1308,6 @@ int __init ip_vs_conn_init(void)
void ip_vs_conn_cleanup(void)
{
unregister_pernet_subsys(&ipvs_conn_ops);
- /* flush all the connection entries first */
- ip_vs_conn_flush();
-
/* Release the empty cache */
kmem_cache_destroy(ip_vs_conn_cachep);
vfree(ip_vs_conn_tab);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 7e6a2a0..7205b49 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -205,7 +205,8 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
const union nf_inet_addr *vaddr, __be16 vport,
struct ip_vs_conn_param *p)
{
- ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p);
+ ip_vs_conn_fill_param(svc->net, svc->af, protocol, caddr, cport, vaddr,
+ vport, p);
p->pe = svc->pe;
if (p->pe && p->pe->fill_param)
return p->pe->fill_param(p, skb);
@@ -348,8 +349,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
/*
* Create a new connection according to the template
*/
- ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, src_port,
- &iph.daddr, dst_port, ¶m);
+ ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol, &iph.saddr,
+ src_port, &iph.daddr, dst_port, ¶m);
cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest, skb->mark);
if (cp == NULL) {
@@ -464,8 +465,10 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
*/
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr,
- pptr[0], &iph.daddr, pptr[1], &p);
+
+ ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
+ &iph.saddr, pptr[0], &iph.daddr, pptr[1],
+ &p);
cp = ip_vs_conn_new(&p, &dest->addr,
dest->port ? dest->port : pptr[1],
flags, dest, skb->mark);
@@ -532,7 +535,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__);
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(svc->af, iph.protocol,
+ ip_vs_conn_fill_param(svc->net, svc->af, iph.protocol,
&iph.saddr, pptr[0],
&iph.daddr, pptr[1], &p);
cp = ip_vs_conn_new(&p, &daddr, 0,
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 77b0036..6a04f9a 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -198,13 +198,15 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
*/
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(AF_INET, iph->protocol,
- &from, port, &cp->caddr, 0, &p);
+ ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
+ iph->protocol, &from, port,
+ &cp->caddr, 0, &p);
n_cp = ip_vs_conn_out_get(&p);
}
if (!n_cp) {
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr,
+ ip_vs_conn_fill_param(ip_vs_conn_net(cp),
+ AF_INET, IPPROTO_TCP, &cp->caddr,
0, &cp->vaddr, port, &p);
n_cp = ip_vs_conn_new(&p, &from, port,
IP_VS_CONN_F_NO_CPORT |
@@ -361,9 +363,9 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
{
struct ip_vs_conn_param p;
- ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port,
- &cp->vaddr, htons(ntohs(cp->vport)-1),
- &p);
+ ip_vs_conn_fill_param(ip_vs_conn_net(cp), AF_INET,
+ iph->protocol, &to, port, &cp->vaddr,
+ htons(ntohs(cp->vport)-1), &p);
n_cp = ip_vs_conn_in_get(&p);
if (!n_cp) {
n_cp = ip_vs_conn_new(&p, &cp->daddr,
diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c
index 4680647..f454c80 100644
--- a/net/netfilter/ipvs/ip_vs_nfct.c
+++ b/net/netfilter/ipvs/ip_vs_nfct.c
@@ -141,6 +141,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
struct nf_conntrack_tuple *orig, new_reply;
struct ip_vs_conn *cp;
struct ip_vs_conn_param p;
+ struct net *net = nf_ct_net(ct);
if (exp->tuple.src.l3num != PF_INET)
return;
@@ -155,7 +156,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
/* RS->CLIENT */
orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
- ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum,
+ ip_vs_conn_fill_param(net, exp->tuple.src.l3num, orig->dst.protonum,
&orig->src.u3, orig->src.u.tcp.port,
&orig->dst.u3, orig->dst.u.tcp.port, &p);
cp = ip_vs_conn_out_get(&p);
@@ -268,7 +269,8 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp)
" for conn " FMT_CONN "\n",
__func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
- h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple);
+ h = nf_conntrack_find_get(ip_vs_conn_net(cp), NF_CT_DEFAULT_ZONE,
+ &tuple);
if (h) {
ct = nf_ct_tuplehash_to_ctrack(h);
/* Show what happens instead of calling nf_ct_kill() */
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 28039cb..5b8eb8b 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -41,15 +41,16 @@ struct isakmp_hdr {
#define PORT_ISAKMP 500
static void
-ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph,
- int inverse, struct ip_vs_conn_param *p)
+ah_esp_conn_fill_param_proto(struct net *net, int af,
+ const struct ip_vs_iphdr *iph, int inverse,
+ struct ip_vs_conn_param *p)
{
if (likely(!inverse))
- ip_vs_conn_fill_param(af, IPPROTO_UDP,
+ ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
&iph->saddr, htons(PORT_ISAKMP),
&iph->daddr, htons(PORT_ISAKMP), p);
else
- ip_vs_conn_fill_param(af, IPPROTO_UDP,
+ ip_vs_conn_fill_param(net, af, IPPROTO_UDP,
&iph->daddr, htons(PORT_ISAKMP),
&iph->saddr, htons(PORT_ISAKMP), p);
}
@@ -61,8 +62,9 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb,
{
struct ip_vs_conn *cp;
struct ip_vs_conn_param p;
+ struct net *net = skb_net(skb);
- ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
+ ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
cp = ip_vs_conn_in_get(&p);
if (!cp) {
/*
@@ -89,8 +91,9 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
{
struct ip_vs_conn *cp;
struct ip_vs_conn_param p;
+ struct net *net = skb_net(skb);
- ah_esp_conn_fill_param_proto(af, iph, inverse, &p);
+ ah_esp_conn_fill_param_proto(net, af, iph, inverse, &p);
cp = ip_vs_conn_out_get(&p);
if (!cp) {
IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 569e77b..550365a 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -1055,7 +1055,7 @@ static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
static int sctp_app_conn_bind(struct ip_vs_conn *cp)
{
- struct netns_ipvs *ipvs = net_ipvs(&init_net);
+ struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
int hash;
struct ip_vs_app *inc;
int result = 0;
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 757aaaf..d8b3f9f 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -620,7 +620,7 @@ tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
static int
tcp_app_conn_bind(struct ip_vs_conn *cp)
{
- struct netns_ipvs *ipvs = net_ipvs(&init_net);
+ struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
int hash;
struct ip_vs_app *inc;
int result = 0;
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 1dc3941..581157b 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -396,7 +396,7 @@ udp_unregister_app(struct net *net, struct ip_vs_app *inc)
static int udp_app_conn_bind(struct ip_vs_conn *cp)
{
- struct netns_ipvs *ipvs = net_ipvs(&init_net);
+ struct netns_ipvs *ipvs = net_ipvs(ip_vs_conn_net(cp));
int hash;
struct ip_vs_app *inc;
int result = 0;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index c29e73d..f85e47d 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -660,21 +660,21 @@ control:
* fill_param used by version 1
*/
static inline int
-ip_vs_conn_fill_param_sync(int af, union ip_vs_sync_conn *sc,
+ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
struct ip_vs_conn_param *p,
__u8 *pe_data, unsigned int pe_data_len,
__u8 *pe_name, unsigned int pe_name_len)
{
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
- ip_vs_conn_fill_param(af, sc->v6.protocol,
+ ip_vs_conn_fill_param(net, af, sc->v6.protocol,
(const union nf_inet_addr *)&sc->v6.caddr,
sc->v6.cport,
(const union nf_inet_addr *)&sc->v6.vaddr,
sc->v6.vport, p);
else
#endif
- ip_vs_conn_fill_param(af, sc->v4.protocol,
+ ip_vs_conn_fill_param(net, af, sc->v4.protocol,
(const union nf_inet_addr *)&sc->v4.caddr,
sc->v4.cport,
(const union nf_inet_addr *)&sc->v4.vaddr,
@@ -881,7 +881,7 @@ static void ip_vs_process_message_v0(struct net *net, const char *buffer,
}
}
- ip_vs_conn_fill_param(AF_INET, s->protocol,
+ ip_vs_conn_fill_param(net, AF_INET, s->protocol,
(const union nf_inet_addr *)&s->caddr,
s->cport,
(const union nf_inet_addr *)&s->vaddr,
@@ -1043,9 +1043,8 @@ static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
state = 0;
}
}
- if (ip_vs_conn_fill_param_sync(af, s, ¶m,
- pe_data, pe_data_len,
- pe_name, pe_name_len)) {
+ if (ip_vs_conn_fill_param_sync(net, af, s, ¶m, pe_data,
+ pe_data_len, pe_name, pe_name_len)) {
retc = 50;
goto out;
}
--
1.7.2.3
^ permalink raw reply related
* [PATCH 47/79] IPVS: netns awareness to ip_vs_sync
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
All global variables moved to struct ipvs,
most external changes fixed (i.e. init_net removed)
in sync_buf create + 4 replaced by sizeof(struct..)
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 14 +-
include/net/netns/ip_vs.h | 16 ++
net/netfilter/ipvs/ip_vs_core.c | 15 +-
net/netfilter/ipvs/ip_vs_ctl.c | 52 ++++---
net/netfilter/ipvs/ip_vs_sync.c | 334 +++++++++++++++++++++------------------
5 files changed, 240 insertions(+), 191 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index c08927b..4265b5e 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -958,7 +958,7 @@ extern struct ip_vs_stats ip_vs_stats;
extern const struct ctl_path net_vs_ctl_path[];
extern int sysctl_ip_vs_sync_ver;
-extern void ip_vs_sync_switch_mode(int mode);
+extern void ip_vs_sync_switch_mode(struct net *net, int mode);
extern struct ip_vs_service *
ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport);
@@ -987,14 +987,10 @@ extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
* IPVS sync daemon data and function prototypes
* (from ip_vs_sync.c)
*/
-extern volatile int ip_vs_sync_state;
-extern volatile int ip_vs_master_syncid;
-extern volatile int ip_vs_backup_syncid;
-extern char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-extern char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-extern int start_sync_thread(int state, char *mcast_ifn, __u8 syncid);
-extern int stop_sync_thread(int state);
-extern void ip_vs_sync_conn(struct ip_vs_conn *cp);
+extern int start_sync_thread(struct net *net, int state, char *mcast_ifn,
+ __u8 syncid);
+extern int stop_sync_thread(struct net *net, int state);
+extern void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp);
extern int ip_vs_sync_init(void);
extern void ip_vs_sync_cleanup(void);
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
index db02401..aba78f3 100644
--- a/include/net/netns/ip_vs.h
+++ b/include/net/netns/ip_vs.h
@@ -74,6 +74,22 @@ struct netns_ipvs {
struct list_head est_list; /* estimator list */
spinlock_t est_lock;
struct timer_list est_timer; /* Estimation timer */
+ /* ip_vs_sync */
+ struct list_head sync_queue;
+ spinlock_t sync_lock;
+ struct ip_vs_sync_buff *sync_buff;
+ spinlock_t sync_buff_lock;
+ struct sockaddr_in sync_mcast_addr;
+ struct task_struct *master_thread;
+ struct task_struct *backup_thread;
+ int send_mesg_maxlen;
+ int recv_mesg_maxlen;
+ volatile int sync_state;
+ volatile int master_syncid;
+ volatile int backup_syncid;
+ /* multicast interface name */
+ char master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
+ char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
};
#endif /* IP_VS_H_ */
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 9317aff..5531d56 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1471,12 +1471,13 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
static unsigned int
ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
{
- struct net *net = NULL;
+ struct net *net;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
int ret, restart, pkts;
+ struct netns_ipvs *ipvs;
/* Already marked as IPVS request or reply? */
if (skb->ipvs_property)
@@ -1556,7 +1557,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
}
IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet");
-
+ net = skb_net(skb);
+ ipvs = net_ipvs(net);
/* Check the server status */
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
/* the destination server is not available */
@@ -1589,12 +1591,13 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
*
* For ONE_PKT let ip_vs_sync_conn() do the filter work.
*/
+
if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
pkts = sysctl_ip_vs_sync_threshold[0];
else
pkts = atomic_add_return(1, &cp->in_pkts);
- if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+ if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
cp->protocol == IPPROTO_SCTP) {
if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
(pkts % sysctl_ip_vs_sync_threshold[1]
@@ -1603,13 +1606,13 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
((cp->state == IP_VS_SCTP_S_CLOSED) ||
(cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
(cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
- ip_vs_sync_conn(cp);
+ ip_vs_sync_conn(net, cp);
goto out;
}
}
/* Keep this block last: TCP and others with pp->num_states <= 1 */
- else if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+ else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
(((cp->protocol != IPPROTO_TCP ||
cp->state == IP_VS_TCP_S_ESTABLISHED) &&
(pkts % sysctl_ip_vs_sync_threshold[1]
@@ -1619,7 +1622,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
(cp->state == IP_VS_TCP_S_CLOSE) ||
(cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
(cp->state == IP_VS_TCP_S_TIME_WAIT)))))
- ip_vs_sync_conn(cp);
+ ip_vs_sync_conn(net, cp);
out:
cp->old_state = cp->state;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index c89beb8..03f8631 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1559,7 +1559,8 @@ proc_do_sync_mode(ctl_table *table, int write,
/* Restore the correct value */
*valp = val;
} else {
- ip_vs_sync_switch_mode(val);
+ struct net *net = current->nsproxy->net_ns;
+ ip_vs_sync_switch_mode(net, val);
}
}
return rc;
@@ -2174,11 +2175,12 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
goto out_unlock;
} else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
- ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
+ ret = start_sync_thread(net, dm->state, dm->mcast_ifn,
+ dm->syncid);
goto out_unlock;
} else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
- ret = stop_sync_thread(dm->state);
+ ret = stop_sync_thread(net, dm->state);
goto out_unlock;
}
@@ -2424,6 +2426,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
int ret = 0;
unsigned int copylen;
struct net *net = sock_net(sk);
+ struct netns_ipvs *ipvs = net_ipvs(net);
BUG_ON(!net);
if (!capable(CAP_NET_ADMIN))
@@ -2546,15 +2549,17 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
struct ip_vs_daemon_user d[2];
memset(&d, 0, sizeof(d));
- if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
+ if (ipvs->sync_state & IP_VS_STATE_MASTER) {
d[0].state = IP_VS_STATE_MASTER;
- strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
- d[0].syncid = ip_vs_master_syncid;
+ strlcpy(d[0].mcast_ifn, ipvs->master_mcast_ifn,
+ sizeof(d[0].mcast_ifn));
+ d[0].syncid = ipvs->master_syncid;
}
- if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
+ if (ipvs->sync_state & IP_VS_STATE_BACKUP) {
d[1].state = IP_VS_STATE_BACKUP;
- strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
- d[1].syncid = ip_vs_backup_syncid;
+ strlcpy(d[1].mcast_ifn, ipvs->backup_mcast_ifn,
+ sizeof(d[1].mcast_ifn));
+ d[1].syncid = ipvs->backup_syncid;
}
if (copy_to_user(user, &d, sizeof(d)) != 0)
ret = -EFAULT;
@@ -3061,20 +3066,23 @@ nla_put_failure:
static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
struct netlink_callback *cb)
{
+ struct net *net = skb_net(skb);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
mutex_lock(&__ip_vs_mutex);
- if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
+ if ((ipvs->sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
- ip_vs_master_mcast_ifn,
- ip_vs_master_syncid, cb) < 0)
+ ipvs->master_mcast_ifn,
+ ipvs->master_syncid, cb) < 0)
goto nla_put_failure;
cb->args[0] = 1;
}
- if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
+ if ((ipvs->sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
- ip_vs_backup_mcast_ifn,
- ip_vs_backup_syncid, cb) < 0)
+ ipvs->backup_mcast_ifn,
+ ipvs->backup_syncid, cb) < 0)
goto nla_put_failure;
cb->args[1] = 1;
@@ -3086,24 +3094,26 @@ nla_put_failure:
return skb->len;
}
-static int ip_vs_genl_new_daemon(struct nlattr **attrs)
+static int ip_vs_genl_new_daemon(struct net *net, struct nlattr **attrs)
{
if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
return -EINVAL;
- return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
+ return start_sync_thread(net,
+ nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
}
-static int ip_vs_genl_del_daemon(struct nlattr **attrs)
+static int ip_vs_genl_del_daemon(struct net *net, struct nlattr **attrs)
{
if (!attrs[IPVS_DAEMON_ATTR_STATE])
return -EINVAL;
- return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
+ return stop_sync_thread(net,
+ nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
}
static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
@@ -3159,9 +3169,9 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
}
if (cmd == IPVS_CMD_NEW_DAEMON)
- ret = ip_vs_genl_new_daemon(daemon_attrs);
+ ret = ip_vs_genl_new_daemon(net, daemon_attrs);
else
- ret = ip_vs_genl_del_daemon(daemon_attrs);
+ ret = ip_vs_genl_del_daemon(net, daemon_attrs);
goto out;
} else if (cmd == IPVS_CMD_ZERO &&
!info->attrs[IPVS_CMD_ATTR_SERVICE]) {
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 6831e8f..c29e73d 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -192,6 +192,7 @@ union ip_vs_sync_conn {
#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
struct ip_vs_sync_thread_data {
+ struct net *net;
struct socket *sock;
char *buf;
};
@@ -259,10 +260,6 @@ struct ip_vs_sync_mesg {
/* ip_vs_sync_conn entries start here */
};
-/* the maximum length of sync (sending/receiving) message */
-static int sync_send_mesg_maxlen;
-static int sync_recv_mesg_maxlen;
-
struct ip_vs_sync_buff {
struct list_head list;
unsigned long firstuse;
@@ -273,28 +270,6 @@ struct ip_vs_sync_buff {
unsigned char *end;
};
-
-/* the sync_buff list head and the lock */
-static LIST_HEAD(ip_vs_sync_queue);
-static DEFINE_SPINLOCK(ip_vs_sync_lock);
-
-/* current sync_buff for accepting new conn entries */
-static struct ip_vs_sync_buff *curr_sb = NULL;
-static DEFINE_SPINLOCK(curr_sb_lock);
-
-/* ipvs sync daemon state */
-volatile int ip_vs_sync_state = IP_VS_STATE_NONE;
-volatile int ip_vs_master_syncid = 0;
-volatile int ip_vs_backup_syncid = 0;
-
-/* multicast interface name */
-char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
-
-/* sync daemon tasks */
-static struct task_struct *sync_master_thread;
-static struct task_struct *sync_backup_thread;
-
/* multicast addr */
static struct sockaddr_in mcast_addr = {
.sin_family = AF_INET,
@@ -324,20 +299,20 @@ static void hton_seq(struct ip_vs_seq *ho, struct ip_vs_seq *no)
put_unaligned_be32(ho->previous_delta, &no->previous_delta);
}
-static inline struct ip_vs_sync_buff *sb_dequeue(void)
+static inline struct ip_vs_sync_buff *sb_dequeue(struct netns_ipvs *ipvs)
{
struct ip_vs_sync_buff *sb;
- spin_lock_bh(&ip_vs_sync_lock);
- if (list_empty(&ip_vs_sync_queue)) {
+ spin_lock_bh(&ipvs->sync_lock);
+ if (list_empty(&ipvs->sync_queue)) {
sb = NULL;
} else {
- sb = list_entry(ip_vs_sync_queue.next,
+ sb = list_entry(ipvs->sync_queue.next,
struct ip_vs_sync_buff,
list);
list_del(&sb->list);
}
- spin_unlock_bh(&ip_vs_sync_lock);
+ spin_unlock_bh(&ipvs->sync_lock);
return sb;
}
@@ -345,25 +320,27 @@ static inline struct ip_vs_sync_buff *sb_dequeue(void)
/*
* Create a new sync buffer for Version 1 proto.
*/
-static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void)
+static inline struct ip_vs_sync_buff *
+ip_vs_sync_buff_create(struct netns_ipvs *ipvs)
{
struct ip_vs_sync_buff *sb;
if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
return NULL;
- if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) {
+ sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
+ if (!sb->mesg) {
kfree(sb);
return NULL;
}
sb->mesg->reserved = 0; /* old nr_conns i.e. must be zeo now */
sb->mesg->version = SYNC_PROTO_VER;
- sb->mesg->syncid = ip_vs_master_syncid;
+ sb->mesg->syncid = ipvs->master_syncid;
sb->mesg->size = sizeof(struct ip_vs_sync_mesg);
sb->mesg->nr_conns = 0;
sb->mesg->spare = 0;
sb->head = (unsigned char *)sb->mesg + sizeof(struct ip_vs_sync_mesg);
- sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen;
+ sb->end = (unsigned char *)sb->mesg + ipvs->send_mesg_maxlen;
sb->firstuse = jiffies;
return sb;
@@ -375,14 +352,16 @@ static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb)
kfree(sb);
}
-static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
+static inline void sb_queue_tail(struct netns_ipvs *ipvs)
{
- spin_lock(&ip_vs_sync_lock);
- if (ip_vs_sync_state & IP_VS_STATE_MASTER)
- list_add_tail(&sb->list, &ip_vs_sync_queue);
+ struct ip_vs_sync_buff *sb = ipvs->sync_buff;
+
+ spin_lock(&ipvs->sync_lock);
+ if (ipvs->sync_state & IP_VS_STATE_MASTER)
+ list_add_tail(&sb->list, &ipvs->sync_queue);
else
ip_vs_sync_buff_release(sb);
- spin_unlock(&ip_vs_sync_lock);
+ spin_unlock(&ipvs->sync_lock);
}
/*
@@ -390,18 +369,18 @@ static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
* than the specified time or the specified time is zero.
*/
static inline struct ip_vs_sync_buff *
-get_curr_sync_buff(unsigned long time)
+get_curr_sync_buff(struct netns_ipvs *ipvs, unsigned long time)
{
struct ip_vs_sync_buff *sb;
- spin_lock_bh(&curr_sb_lock);
- if (curr_sb && (time == 0 ||
- time_before(jiffies - curr_sb->firstuse, time))) {
- sb = curr_sb;
- curr_sb = NULL;
+ spin_lock_bh(&ipvs->sync_buff_lock);
+ if (ipvs->sync_buff && (time == 0 ||
+ time_before(jiffies - ipvs->sync_buff->firstuse, time))) {
+ sb = ipvs->sync_buff;
+ ipvs->sync_buff = NULL;
} else
sb = NULL;
- spin_unlock_bh(&curr_sb_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
return sb;
}
@@ -409,33 +388,37 @@ get_curr_sync_buff(unsigned long time)
* Switch mode from sending version 0 or 1
* - must handle sync_buf
*/
-void ip_vs_sync_switch_mode(int mode) {
+void ip_vs_sync_switch_mode(struct net *net, int mode)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
- if (!ip_vs_sync_state & IP_VS_STATE_MASTER)
+ if (!ipvs->sync_state & IP_VS_STATE_MASTER)
return;
- if (mode == sysctl_ip_vs_sync_ver || !curr_sb)
+ if (mode == sysctl_ip_vs_sync_ver || !ipvs->sync_buff)
return;
- spin_lock_bh(&curr_sb_lock);
+ spin_lock_bh(&ipvs->sync_buff_lock);
/* Buffer empty ? then let buf_create do the job */
- if ( curr_sb->mesg->size <= sizeof(struct ip_vs_sync_mesg)) {
- kfree(curr_sb);
- curr_sb = NULL;
+ if (ipvs->sync_buff->mesg->size <= sizeof(struct ip_vs_sync_mesg)) {
+ kfree(ipvs->sync_buff);
+ ipvs->sync_buff = NULL;
} else {
- spin_lock_bh(&ip_vs_sync_lock);
- if (ip_vs_sync_state & IP_VS_STATE_MASTER)
- list_add_tail(&curr_sb->list, &ip_vs_sync_queue);
+ spin_lock_bh(&ipvs->sync_lock);
+ if (ipvs->sync_state & IP_VS_STATE_MASTER)
+ list_add_tail(&ipvs->sync_buff->list,
+ &ipvs->sync_queue);
else
- ip_vs_sync_buff_release(curr_sb);
- spin_unlock_bh(&ip_vs_sync_lock);
+ ip_vs_sync_buff_release(ipvs->sync_buff);
+ spin_unlock_bh(&ipvs->sync_lock);
}
- spin_unlock_bh(&curr_sb_lock);
+ spin_unlock_bh(&ipvs->sync_buff_lock);
}
/*
* Create a new sync buffer for Version 0 proto.
*/
-static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create_v0(void)
+static inline struct ip_vs_sync_buff *
+ip_vs_sync_buff_create_v0(struct netns_ipvs *ipvs)
{
struct ip_vs_sync_buff *sb;
struct ip_vs_sync_mesg_v0 *mesg;
@@ -443,16 +426,17 @@ static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create_v0(void)
if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC)))
return NULL;
- if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) {
+ sb->mesg = kmalloc(ipvs->send_mesg_maxlen, GFP_ATOMIC);
+ if (!sb->mesg) {
kfree(sb);
return NULL;
}
mesg = (struct ip_vs_sync_mesg_v0 *)sb->mesg;
mesg->nr_conns = 0;
- mesg->syncid = ip_vs_master_syncid;
- mesg->size = 4;
- sb->head = (unsigned char *)mesg + 4;
- sb->end = (unsigned char *)mesg + sync_send_mesg_maxlen;
+ mesg->syncid = ipvs->master_syncid;
+ mesg->size = sizeof(struct ip_vs_sync_mesg_v0);
+ sb->head = (unsigned char *)mesg + sizeof(struct ip_vs_sync_mesg_v0);
+ sb->end = (unsigned char *)mesg + ipvs->send_mesg_maxlen;
sb->firstuse = jiffies;
return sb;
}
@@ -461,8 +445,9 @@ static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create_v0(void)
* Version 0 , could be switched in by sys_ctl.
* Add an ip_vs_conn information into the current sync_buff.
*/
-void ip_vs_sync_conn_v0(struct ip_vs_conn *cp)
+void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_sync_mesg_v0 *m;
struct ip_vs_sync_conn_v0 *s;
int len;
@@ -473,10 +458,12 @@ void ip_vs_sync_conn_v0(struct ip_vs_conn *cp)
if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
return;
- spin_lock(&curr_sb_lock);
- if (!curr_sb) {
- if (!(curr_sb=ip_vs_sync_buff_create_v0())) {
- spin_unlock(&curr_sb_lock);
+ spin_lock(&ipvs->sync_buff_lock);
+ if (!ipvs->sync_buff) {
+ ipvs->sync_buff =
+ ip_vs_sync_buff_create_v0(ipvs);
+ if (!ipvs->sync_buff) {
+ spin_unlock(&ipvs->sync_buff_lock);
pr_err("ip_vs_sync_buff_create failed.\n");
return;
}
@@ -484,8 +471,8 @@ void ip_vs_sync_conn_v0(struct ip_vs_conn *cp)
len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
SIMPLE_CONN_SIZE;
- m = (struct ip_vs_sync_mesg_v0 *)curr_sb->mesg;
- s = (struct ip_vs_sync_conn_v0 *)curr_sb->head;
+ m = (struct ip_vs_sync_mesg_v0 *)ipvs->sync_buff->mesg;
+ s = (struct ip_vs_sync_conn_v0 *)ipvs->sync_buff->head;
/* copy members */
s->reserved = 0;
@@ -506,18 +493,18 @@ void ip_vs_sync_conn_v0(struct ip_vs_conn *cp)
m->nr_conns++;
m->size += len;
- curr_sb->head += len;
+ ipvs->sync_buff->head += len;
/* check if there is a space for next one */
- if (curr_sb->head + FULL_CONN_SIZE > curr_sb->end) {
- sb_queue_tail(curr_sb);
- curr_sb = NULL;
+ if (ipvs->sync_buff->head + FULL_CONN_SIZE > ipvs->sync_buff->end) {
+ sb_queue_tail(ipvs);
+ ipvs->sync_buff = NULL;
}
- spin_unlock(&curr_sb_lock);
+ spin_unlock(&ipvs->sync_buff_lock);
/* synchronize its controller if it has */
if (cp->control)
- ip_vs_sync_conn(cp->control);
+ ip_vs_sync_conn(net, cp->control);
}
/*
@@ -525,8 +512,9 @@ void ip_vs_sync_conn_v0(struct ip_vs_conn *cp)
* Called by ip_vs_in.
* Sending Version 1 messages
*/
-void ip_vs_sync_conn(struct ip_vs_conn *cp)
+void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_sync_mesg *m;
union ip_vs_sync_conn *s;
__u8 *p;
@@ -534,7 +522,7 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp)
/* Handle old version of the protocol */
if (sysctl_ip_vs_sync_ver == 0) {
- ip_vs_sync_conn_v0(cp);
+ ip_vs_sync_conn_v0(net, cp);
return;
}
/* Do not sync ONE PACKET */
@@ -551,7 +539,7 @@ sloop:
pe_name_len = strnlen(cp->pe->name, IP_VS_PENAME_MAXLEN);
}
- spin_lock(&curr_sb_lock);
+ spin_lock(&ipvs->sync_buff_lock);
#ifdef CONFIG_IP_VS_IPV6
if (cp->af == AF_INET6)
@@ -570,26 +558,27 @@ sloop:
/* check if there is a space for this one */
pad = 0;
- if (curr_sb) {
- pad = (4 - (size_t)curr_sb->head) & 3;
- if (curr_sb->head + len + pad > curr_sb->end) {
- sb_queue_tail(curr_sb);
- curr_sb = NULL;
+ if (ipvs->sync_buff) {
+ pad = (4 - (size_t)ipvs->sync_buff->head) & 3;
+ if (ipvs->sync_buff->head + len + pad > ipvs->sync_buff->end) {
+ sb_queue_tail(ipvs);
+ ipvs->sync_buff = NULL;
pad = 0;
}
}
- if (!curr_sb) {
- if (!(curr_sb=ip_vs_sync_buff_create())) {
- spin_unlock(&curr_sb_lock);
+ if (!ipvs->sync_buff) {
+ ipvs->sync_buff = ip_vs_sync_buff_create(ipvs);
+ if (!ipvs->sync_buff) {
+ spin_unlock(&ipvs->sync_buff_lock);
pr_err("ip_vs_sync_buff_create failed.\n");
return;
}
}
- m = curr_sb->mesg;
- p = curr_sb->head;
- curr_sb->head += pad + len;
+ m = ipvs->sync_buff->mesg;
+ p = ipvs->sync_buff->head;
+ ipvs->sync_buff->head += pad + len;
m->size += pad + len;
/* Add ev. padding from prev. sync_conn */
while (pad--)
@@ -647,7 +636,7 @@ sloop:
}
}
- spin_unlock(&curr_sb_lock);
+ spin_unlock(&ipvs->sync_buff_lock);
control:
/* synchronize its controller if it has */
@@ -699,7 +688,8 @@ ip_vs_conn_fill_param_sync(int af, union ip_vs_sync_conn *sc,
buff[pe_name_len]=0;
p->pe = __ip_vs_pe_getbyname(buff);
if (!p->pe) {
- IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n", buff);
+ IP_VS_DBG(3, "BACKUP, no %s engine found/loaded\n",
+ buff);
return 1;
}
} else {
@@ -748,7 +738,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
* If it is not found the connection will remain unbound
* but still handled.
*/
- dest = ip_vs_find_dest(&init_net, type, daddr, dport, param->vaddr,
+ dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr,
param->vport, protocol, fwmark);
/* Set the approprite ativity flag */
@@ -1089,6 +1079,7 @@ out:
static void ip_vs_process_message(struct net *net, __u8 *buffer,
const size_t buflen)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
__u8 *p, *msg_end;
int i, nr_conns;
@@ -1105,7 +1096,7 @@ static void ip_vs_process_message(struct net *net, __u8 *buffer,
return;
}
/* SyncID sanity check */
- if (ip_vs_backup_syncid != 0 && m2->syncid != ip_vs_backup_syncid) {
+ if (ipvs->backup_syncid != 0 && m2->syncid != ipvs->backup_syncid) {
IP_VS_DBG(7, "BACKUP, Ignoring syncid = %d\n", m2->syncid);
return;
}
@@ -1190,8 +1181,10 @@ static int set_mcast_if(struct sock *sk, char *ifname)
{
struct net_device *dev;
struct inet_sock *inet = inet_sk(sk);
+ struct net *net = sock_net(sk);
- if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+ dev = __dev_get_by_name(net, ifname);
+ if (!dev)
return -ENODEV;
if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
@@ -1210,30 +1203,33 @@ static int set_mcast_if(struct sock *sk, char *ifname)
* Set the maximum length of sync message according to the
* specified interface's MTU.
*/
-static int set_sync_mesg_maxlen(int sync_state)
+static int set_sync_mesg_maxlen(struct net *net, int sync_state)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct net_device *dev;
int num;
if (sync_state == IP_VS_STATE_MASTER) {
- if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL)
+ dev = __dev_get_by_name(net, ipvs->master_mcast_ifn);
+ if (!dev)
return -ENODEV;
num = (dev->mtu - sizeof(struct iphdr) -
sizeof(struct udphdr) -
SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE;
- sync_send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
+ ipvs->send_mesg_maxlen = SYNC_MESG_HEADER_LEN +
SIMPLE_CONN_SIZE * min(num, MAX_CONNS_PER_SYNCBUFF);
IP_VS_DBG(7, "setting the maximum length of sync sending "
- "message %d.\n", sync_send_mesg_maxlen);
+ "message %d.\n", ipvs->send_mesg_maxlen);
} else if (sync_state == IP_VS_STATE_BACKUP) {
- if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL)
+ dev = __dev_get_by_name(net, ipvs->backup_mcast_ifn);
+ if (!dev)
return -ENODEV;
- sync_recv_mesg_maxlen = dev->mtu -
+ ipvs->recv_mesg_maxlen = dev->mtu -
sizeof(struct iphdr) - sizeof(struct udphdr);
IP_VS_DBG(7, "setting the maximum length of sync receiving "
- "message %d.\n", sync_recv_mesg_maxlen);
+ "message %d.\n", ipvs->recv_mesg_maxlen);
}
return 0;
@@ -1248,6 +1244,7 @@ static int set_sync_mesg_maxlen(int sync_state)
static int
join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
{
+ struct net *net = sock_net(sk);
struct ip_mreqn mreq;
struct net_device *dev;
int ret;
@@ -1255,7 +1252,8 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
memset(&mreq, 0, sizeof(mreq));
memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
- if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+ dev = __dev_get_by_name(net, ifname);
+ if (!dev)
return -ENODEV;
if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
return -EINVAL;
@@ -1272,11 +1270,13 @@ join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname)
static int bind_mcastif_addr(struct socket *sock, char *ifname)
{
+ struct net *net = sock_net(sock->sk);
struct net_device *dev;
__be32 addr;
struct sockaddr_in sin;
- if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
+ dev = __dev_get_by_name(net, ifname);
+ if (!dev)
return -ENODEV;
addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
@@ -1298,8 +1298,9 @@ static int bind_mcastif_addr(struct socket *sock, char *ifname)
/*
* Set up sending multicast socket over UDP
*/
-static struct socket * make_send_sock(void)
+static struct socket *make_send_sock(struct net *net)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct socket *sock;
int result;
@@ -1310,7 +1311,7 @@ static struct socket * make_send_sock(void)
return ERR_PTR(result);
}
- result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);
+ result = set_mcast_if(sock->sk, ipvs->master_mcast_ifn);
if (result < 0) {
pr_err("Error setting outbound mcast interface\n");
goto error;
@@ -1319,7 +1320,7 @@ static struct socket * make_send_sock(void)
set_mcast_loop(sock->sk, 0);
set_mcast_ttl(sock->sk, 1);
- result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);
+ result = bind_mcastif_addr(sock, ipvs->master_mcast_ifn);
if (result < 0) {
pr_err("Error binding address of the mcast interface\n");
goto error;
@@ -1343,8 +1344,9 @@ static struct socket * make_send_sock(void)
/*
* Set up receiving multicast socket over UDP
*/
-static struct socket * make_receive_sock(void)
+static struct socket *make_receive_sock(struct net *net)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct socket *sock;
int result;
@@ -1368,7 +1370,7 @@ static struct socket * make_receive_sock(void)
/* join the multicast group */
result = join_mcast_group(sock->sk,
(struct in_addr *) &mcast_addr.sin_addr,
- ip_vs_backup_mcast_ifn);
+ ipvs->backup_mcast_ifn);
if (result < 0) {
pr_err("Error joining to the multicast group\n");
goto error;
@@ -1439,20 +1441,21 @@ ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen)
static int sync_thread_master(void *data)
{
struct ip_vs_sync_thread_data *tinfo = data;
+ struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
struct ip_vs_sync_buff *sb;
pr_info("sync thread started: state = MASTER, mcast_ifn = %s, "
"syncid = %d\n",
- ip_vs_master_mcast_ifn, ip_vs_master_syncid);
+ ipvs->master_mcast_ifn, ipvs->master_syncid);
while (!kthread_should_stop()) {
- while ((sb = sb_dequeue())) {
+ while ((sb = sb_dequeue(ipvs))) {
ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
ip_vs_sync_buff_release(sb);
}
- /* check if entries stay in curr_sb for 2 seconds */
- sb = get_curr_sync_buff(2 * HZ);
+ /* check if entries stay in ipvs->sync_buff for 2 seconds */
+ sb = get_curr_sync_buff(ipvs, 2 * HZ);
if (sb) {
ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
ip_vs_sync_buff_release(sb);
@@ -1462,14 +1465,13 @@ static int sync_thread_master(void *data)
}
/* clean up the sync_buff queue */
- while ((sb=sb_dequeue())) {
+ while ((sb = sb_dequeue(ipvs)))
ip_vs_sync_buff_release(sb);
- }
/* clean up the current sync_buff */
- if ((sb = get_curr_sync_buff(0))) {
+ sb = get_curr_sync_buff(ipvs, 0);
+ if (sb)
ip_vs_sync_buff_release(sb);
- }
/* release the sending multicast socket */
sock_release(tinfo->sock);
@@ -1482,11 +1484,12 @@ static int sync_thread_master(void *data)
static int sync_thread_backup(void *data)
{
struct ip_vs_sync_thread_data *tinfo = data;
+ struct netns_ipvs *ipvs = net_ipvs(tinfo->net);
int len;
pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, "
"syncid = %d\n",
- ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
+ ipvs->backup_mcast_ifn, ipvs->backup_syncid);
while (!kthread_should_stop()) {
wait_event_interruptible(*sk_sleep(tinfo->sock->sk),
@@ -1496,7 +1499,7 @@ static int sync_thread_backup(void *data)
/* do we have data now? */
while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
len = ip_vs_receive(tinfo->sock, tinfo->buf,
- sync_recv_mesg_maxlen);
+ ipvs->recv_mesg_maxlen);
if (len <= 0) {
pr_err("receiving message error\n");
break;
@@ -1505,7 +1508,7 @@ static int sync_thread_backup(void *data)
/* disable bottom half, because it accesses the data
shared by softirq while getting/creating conns */
local_bh_disable();
- ip_vs_process_message(&init_net, tinfo->buf, len);
+ ip_vs_process_message(tinfo->net, tinfo->buf, len);
local_bh_enable();
}
}
@@ -1519,11 +1522,12 @@ static int sync_thread_backup(void *data)
}
-int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
+int start_sync_thread(struct net *net, int state, char *mcast_ifn, __u8 syncid)
{
struct ip_vs_sync_thread_data *tinfo;
struct task_struct **realtask, *task;
struct socket *sock;
+ struct netns_ipvs *ipvs = net_ipvs(net);
char *name, *buf = NULL;
int (*threadfn)(void *data);
int result = -ENOMEM;
@@ -1533,27 +1537,27 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
sizeof(struct ip_vs_sync_conn_v0));
if (state == IP_VS_STATE_MASTER) {
- if (sync_master_thread)
+ if (ipvs->master_thread)
return -EEXIST;
- strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
- sizeof(ip_vs_master_mcast_ifn));
- ip_vs_master_syncid = syncid;
- realtask = &sync_master_thread;
- name = "ipvs_syncmaster";
+ strlcpy(ipvs->master_mcast_ifn, mcast_ifn,
+ sizeof(ipvs->master_mcast_ifn));
+ ipvs->master_syncid = syncid;
+ realtask = &ipvs->master_thread;
+ name = "ipvs_master:%d";
threadfn = sync_thread_master;
- sock = make_send_sock();
+ sock = make_send_sock(net);
} else if (state == IP_VS_STATE_BACKUP) {
- if (sync_backup_thread)
+ if (ipvs->backup_thread)
return -EEXIST;
- strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
- sizeof(ip_vs_backup_mcast_ifn));
- ip_vs_backup_syncid = syncid;
- realtask = &sync_backup_thread;
- name = "ipvs_syncbackup";
+ strlcpy(ipvs->backup_mcast_ifn, mcast_ifn,
+ sizeof(ipvs->backup_mcast_ifn));
+ ipvs->backup_syncid = syncid;
+ realtask = &ipvs->backup_thread;
+ name = "ipvs_backup:%d";
threadfn = sync_thread_backup;
- sock = make_receive_sock();
+ sock = make_receive_sock(net);
} else {
return -EINVAL;
}
@@ -1563,9 +1567,9 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
goto out;
}
- set_sync_mesg_maxlen(state);
+ set_sync_mesg_maxlen(net, state);
if (state == IP_VS_STATE_BACKUP) {
- buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
+ buf = kmalloc(ipvs->recv_mesg_maxlen, GFP_KERNEL);
if (!buf)
goto outsocket;
}
@@ -1574,10 +1578,11 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
if (!tinfo)
goto outbuf;
+ tinfo->net = net;
tinfo->sock = sock;
tinfo->buf = buf;
- task = kthread_run(threadfn, tinfo, name);
+ task = kthread_run(threadfn, tinfo, name, ipvs->gen);
if (IS_ERR(task)) {
result = PTR_ERR(task);
goto outtinfo;
@@ -1585,7 +1590,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
/* mark as active */
*realtask = task;
- ip_vs_sync_state |= state;
+ ipvs->sync_state |= state;
/* increase the module use count */
ip_vs_use_count_inc();
@@ -1603,16 +1608,18 @@ out:
}
-int stop_sync_thread(int state)
+int stop_sync_thread(struct net *net, int state)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
if (state == IP_VS_STATE_MASTER) {
- if (!sync_master_thread)
+ if (!ipvs->master_thread)
return -ESRCH;
pr_info("stopping master sync thread %d ...\n",
- task_pid_nr(sync_master_thread));
+ task_pid_nr(ipvs->master_thread));
/*
* The lock synchronizes with sb_queue_tail(), so that we don't
@@ -1620,21 +1627,21 @@ int stop_sync_thread(int state)
* progress of stopping the master sync daemon.
*/
- spin_lock_bh(&ip_vs_sync_lock);
- ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
- spin_unlock_bh(&ip_vs_sync_lock);
- kthread_stop(sync_master_thread);
- sync_master_thread = NULL;
+ spin_lock_bh(&ipvs->sync_lock);
+ ipvs->sync_state &= ~IP_VS_STATE_MASTER;
+ spin_unlock_bh(&ipvs->sync_lock);
+ kthread_stop(ipvs->master_thread);
+ ipvs->master_thread = NULL;
} else if (state == IP_VS_STATE_BACKUP) {
- if (!sync_backup_thread)
+ if (!ipvs->backup_thread)
return -ESRCH;
pr_info("stopping backup sync thread %d ...\n",
- task_pid_nr(sync_backup_thread));
+ task_pid_nr(ipvs->backup_thread));
- ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
- kthread_stop(sync_backup_thread);
- sync_backup_thread = NULL;
+ ipvs->sync_state &= ~IP_VS_STATE_BACKUP;
+ kthread_stop(ipvs->backup_thread);
+ ipvs->backup_thread = NULL;
} else {
return -EINVAL;
}
@@ -1650,12 +1657,29 @@ int stop_sync_thread(int state)
*/
static int __net_init __ip_vs_sync_init(struct net *net)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ if (!net_eq(net, &init_net)) /* netns not enabled yet */
+ return -EPERM;
+
+ INIT_LIST_HEAD(&ipvs->sync_queue);
+ spin_lock_init(&ipvs->sync_lock);
+ spin_lock_init(&ipvs->sync_buff_lock);
+
+ ipvs->sync_mcast_addr.sin_family = AF_INET;
+ ipvs->sync_mcast_addr.sin_port = cpu_to_be16(IP_VS_SYNC_PORT);
+ ipvs->sync_mcast_addr.sin_addr.s_addr = cpu_to_be32(IP_VS_SYNC_GROUP);
return 0;
}
static void __ip_vs_sync_cleanup(struct net *net)
{
+ if (!net_eq(net, &init_net)) /* netns not enabled yet */
+ return;
+ stop_sync_thread(net, IP_VS_STATE_MASTER);
+ stop_sync_thread(net, IP_VS_STATE_BACKUP);
}
+
static struct pernet_operations ipvs_sync_ops = {
.init = __ip_vs_sync_init,
.exit = __ip_vs_sync_cleanup,
--
1.7.2.3
^ permalink raw reply related
* [PATCH 45/79] IPVS: netns awareness to ip_vs_app
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
All variables moved to struct ipvs,
most external changes fixed (i.e. init_net removed)
in ip_vs_protocol param struct net *net added to:
- register_app()
- unregister_app()
This affected almost all proto_xxx.c files
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 12 +++---
include/net/netns/ip_vs.h | 5 ++
net/netfilter/ipvs/ip_vs_app.c | 73 +++++++++++++++++++-------------
net/netfilter/ipvs/ip_vs_ftp.c | 8 ++--
net/netfilter/ipvs/ip_vs_proto_sctp.c | 12 +++---
net/netfilter/ipvs/ip_vs_proto_tcp.c | 12 +++---
net/netfilter/ipvs/ip_vs_proto_udp.c | 12 +++---
7 files changed, 76 insertions(+), 58 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index cc6ae62..0cdd8ce 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -402,9 +402,9 @@ struct ip_vs_protocol {
const struct sk_buff *skb,
struct ip_vs_proto_data *pd);
- int (*register_app)(struct ip_vs_app *inc);
+ int (*register_app)(struct net *net, struct ip_vs_app *inc);
- void (*unregister_app)(struct ip_vs_app *inc);
+ void (*unregister_app)(struct net *net, struct ip_vs_app *inc);
int (*app_conn_bind)(struct ip_vs_conn *cp);
@@ -871,12 +871,12 @@ ip_vs_control_add(struct ip_vs_conn *cp, struct ip_vs_conn *ctl_cp)
* (from ip_vs_app.c)
*/
#define IP_VS_APP_MAX_PORTS 8
-extern int register_ip_vs_app(struct ip_vs_app *app);
-extern void unregister_ip_vs_app(struct ip_vs_app *app);
+extern int register_ip_vs_app(struct net *net, struct ip_vs_app *app);
+extern void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app);
extern int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp);
extern void ip_vs_unbind_app(struct ip_vs_conn *cp);
-extern int
-register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port);
+extern int register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app,
+ __u16 proto, __u16 port);
extern int ip_vs_app_inc_get(struct ip_vs_app *inc);
extern void ip_vs_app_inc_put(struct ip_vs_app *inc);
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
index 58bd3fd..03f7fe1 100644
--- a/include/net/netns/ip_vs.h
+++ b/include/net/netns/ip_vs.h
@@ -28,6 +28,11 @@ struct netns_ipvs {
#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
struct list_head rs_table[IP_VS_RTAB_SIZE];
+ /* ip_vs_app */
+ struct list_head app_list;
+ struct mutex app_mutex;
+ struct lock_class_key app_key; /* mutex debuging */
+
/* ip_vs_proto */
#define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */
struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
diff --git a/net/netfilter/ipvs/ip_vs_app.c b/net/netfilter/ipvs/ip_vs_app.c
index 40b09cc..286f465 100644
--- a/net/netfilter/ipvs/ip_vs_app.c
+++ b/net/netfilter/ipvs/ip_vs_app.c
@@ -43,11 +43,6 @@ EXPORT_SYMBOL(register_ip_vs_app);
EXPORT_SYMBOL(unregister_ip_vs_app);
EXPORT_SYMBOL(register_ip_vs_app_inc);
-/* ipvs application list head */
-static LIST_HEAD(ip_vs_app_list);
-static DEFINE_MUTEX(__ip_vs_app_mutex);
-
-
/*
* Get an ip_vs_app object
*/
@@ -67,7 +62,8 @@ static inline void ip_vs_app_put(struct ip_vs_app *app)
* Allocate/initialize app incarnation and register it in proto apps.
*/
static int
-ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
+ip_vs_app_inc_new(struct net *net, struct ip_vs_app *app, __u16 proto,
+ __u16 port)
{
struct ip_vs_protocol *pp;
struct ip_vs_app *inc;
@@ -98,7 +94,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
}
}
- ret = pp->register_app(inc);
+ ret = pp->register_app(net, inc);
if (ret)
goto out;
@@ -119,7 +115,7 @@ ip_vs_app_inc_new(struct ip_vs_app *app, __u16 proto, __u16 port)
* Release app incarnation
*/
static void
-ip_vs_app_inc_release(struct ip_vs_app *inc)
+ip_vs_app_inc_release(struct net *net, struct ip_vs_app *inc)
{
struct ip_vs_protocol *pp;
@@ -127,7 +123,7 @@ ip_vs_app_inc_release(struct ip_vs_app *inc)
return;
if (pp->unregister_app)
- pp->unregister_app(inc);
+ pp->unregister_app(net, inc);
IP_VS_DBG(9, "%s App %s:%u unregistered\n",
pp->name, inc->name, ntohs(inc->port));
@@ -168,15 +164,17 @@ void ip_vs_app_inc_put(struct ip_vs_app *inc)
* Register an application incarnation in protocol applications
*/
int
-register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
+register_ip_vs_app_inc(struct net *net, struct ip_vs_app *app, __u16 proto,
+ __u16 port)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
int result;
- mutex_lock(&__ip_vs_app_mutex);
+ mutex_lock(&ipvs->app_mutex);
- result = ip_vs_app_inc_new(app, proto, port);
+ result = ip_vs_app_inc_new(net, app, proto, port);
- mutex_unlock(&__ip_vs_app_mutex);
+ mutex_unlock(&ipvs->app_mutex);
return result;
}
@@ -185,16 +183,17 @@ register_ip_vs_app_inc(struct ip_vs_app *app, __u16 proto, __u16 port)
/*
* ip_vs_app registration routine
*/
-int register_ip_vs_app(struct ip_vs_app *app)
+int register_ip_vs_app(struct net *net, struct ip_vs_app *app)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
/* increase the module use count */
ip_vs_use_count_inc();
- mutex_lock(&__ip_vs_app_mutex);
+ mutex_lock(&ipvs->app_mutex);
- list_add(&app->a_list, &ip_vs_app_list);
+ list_add(&app->a_list, &ipvs->app_list);
- mutex_unlock(&__ip_vs_app_mutex);
+ mutex_unlock(&ipvs->app_mutex);
return 0;
}
@@ -204,19 +203,20 @@ int register_ip_vs_app(struct ip_vs_app *app)
* ip_vs_app unregistration routine
* We are sure there are no app incarnations attached to services
*/
-void unregister_ip_vs_app(struct ip_vs_app *app)
+void unregister_ip_vs_app(struct net *net, struct ip_vs_app *app)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_app *inc, *nxt;
- mutex_lock(&__ip_vs_app_mutex);
+ mutex_lock(&ipvs->app_mutex);
list_for_each_entry_safe(inc, nxt, &app->incs_list, a_list) {
- ip_vs_app_inc_release(inc);
+ ip_vs_app_inc_release(net, inc);
}
list_del(&app->a_list);
- mutex_unlock(&__ip_vs_app_mutex);
+ mutex_unlock(&ipvs->app_mutex);
/* decrease the module use count */
ip_vs_use_count_dec();
@@ -226,7 +226,8 @@ void unregister_ip_vs_app(struct ip_vs_app *app)
/*
* Bind ip_vs_conn to its ip_vs_app (called by cp constructor)
*/
-int ip_vs_bind_app(struct ip_vs_conn *cp, struct ip_vs_protocol *pp)
+int ip_vs_bind_app(struct ip_vs_conn *cp,
+ struct ip_vs_protocol *pp)
{
return pp->app_conn_bind(cp);
}
@@ -481,11 +482,11 @@ int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
* /proc/net/ip_vs_app entry function
*/
-static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
+static struct ip_vs_app *ip_vs_app_idx(struct netns_ipvs *ipvs, loff_t pos)
{
struct ip_vs_app *app, *inc;
- list_for_each_entry(app, &ip_vs_app_list, a_list) {
+ list_for_each_entry(app, &ipvs->app_list, a_list) {
list_for_each_entry(inc, &app->incs_list, a_list) {
if (pos-- == 0)
return inc;
@@ -497,19 +498,24 @@ static struct ip_vs_app *ip_vs_app_idx(loff_t pos)
static void *ip_vs_app_seq_start(struct seq_file *seq, loff_t *pos)
{
- mutex_lock(&__ip_vs_app_mutex);
+ struct net *net = seq_file_net(seq);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ mutex_lock(&ipvs->app_mutex);
- return *pos ? ip_vs_app_idx(*pos - 1) : SEQ_START_TOKEN;
+ return *pos ? ip_vs_app_idx(ipvs, *pos - 1) : SEQ_START_TOKEN;
}
static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct ip_vs_app *inc, *app;
struct list_head *e;
+ struct net *net = seq_file_net(seq);
+ struct netns_ipvs *ipvs = net_ipvs(net);
++*pos;
if (v == SEQ_START_TOKEN)
- return ip_vs_app_idx(0);
+ return ip_vs_app_idx(ipvs, 0);
inc = v;
app = inc->app;
@@ -518,7 +524,7 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
return list_entry(e, struct ip_vs_app, a_list);
/* go on to next application */
- for (e = app->a_list.next; e != &ip_vs_app_list; e = e->next) {
+ for (e = app->a_list.next; e != &ipvs->app_list; e = e->next) {
app = list_entry(e, struct ip_vs_app, a_list);
list_for_each_entry(inc, &app->incs_list, a_list) {
return inc;
@@ -529,7 +535,9 @@ static void *ip_vs_app_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static void ip_vs_app_seq_stop(struct seq_file *seq, void *v)
{
- mutex_unlock(&__ip_vs_app_mutex);
+ struct netns_ipvs *ipvs = net_ipvs(seq_file_net(seq));
+
+ mutex_unlock(&ipvs->app_mutex);
}
static int ip_vs_app_seq_show(struct seq_file *seq, void *v)
@@ -557,7 +565,8 @@ static const struct seq_operations ip_vs_app_seq_ops = {
static int ip_vs_app_open(struct inode *inode, struct file *file)
{
- return seq_open(file, &ip_vs_app_seq_ops);
+ return seq_open_net(inode, file, &ip_vs_app_seq_ops,
+ sizeof(struct seq_net_private));
}
static const struct file_operations ip_vs_app_fops = {
@@ -571,9 +580,13 @@ static const struct file_operations ip_vs_app_fops = {
static int __net_init __ip_vs_app_init(struct net *net)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
if (!net_eq(net, &init_net)) /* netns not enabled yet */
return -EPERM;
+ INIT_LIST_HEAD(&ipvs->app_list);
+ __mutex_init(&ipvs->app_mutex, "ipvs->app_mutex", &ipvs->app_key);
proc_net_fops_create(net, "ip_vs_app", 0, &ip_vs_app_fops);
return 0;
}
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index b38ae94..77b0036 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -414,14 +414,14 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
if (!net_eq(net, &init_net)) /* netns not enabled yet */
return -EPERM;
- ret = register_ip_vs_app(app);
+ ret = register_ip_vs_app(net, app);
if (ret)
return ret;
for (i=0; i<IP_VS_APP_MAX_PORTS; i++) {
if (!ports[i])
continue;
- ret = register_ip_vs_app_inc(app, app->protocol, ports[i]);
+ ret = register_ip_vs_app_inc(net, app, app->protocol, ports[i]);
if (ret)
break;
pr_info("%s: loaded support on port[%d] = %d\n",
@@ -429,7 +429,7 @@ static int __net_init __ip_vs_ftp_init(struct net *net)
}
if (ret)
- unregister_ip_vs_app(app);
+ unregister_ip_vs_app(net, app);
return ret;
}
@@ -443,7 +443,7 @@ static void __ip_vs_ftp_exit(struct net *net)
if (!net_eq(net, &init_net)) /* netns not enabled yet */
return;
- unregister_ip_vs_app(app);
+ unregister_ip_vs_app(net, app);
}
static struct pernet_operations ip_vs_ftp_ops = {
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 0f14f79..569e77b 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -1016,14 +1016,14 @@ static inline __u16 sctp_app_hashkey(__be16 port)
& SCTP_APP_TAB_MASK;
}
-static int sctp_register_app(struct ip_vs_app *inc)
+static int sctp_register_app(struct net *net, struct ip_vs_app *inc)
{
struct ip_vs_app *i;
__u16 hash;
__be16 port = inc->port;
int ret = 0;
- struct netns_ipvs *ipvs = net_ipvs(&init_net);
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_SCTP);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
hash = sctp_app_hashkey(port);
@@ -1042,10 +1042,10 @@ out:
return ret;
}
-static void sctp_unregister_app(struct ip_vs_app *inc)
+static void sctp_unregister_app(struct net *net, struct ip_vs_app *inc)
{
- struct netns_ipvs *ipvs = net_ipvs(&init_net);
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_SCTP);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_SCTP);
spin_lock_bh(&ipvs->sctp_app_lock);
atomic_dec(&pd->appcnt);
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 290b380..757aaaf 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -577,14 +577,14 @@ static inline __u16 tcp_app_hashkey(__be16 port)
}
-static int tcp_register_app(struct ip_vs_app *inc)
+static int tcp_register_app(struct net *net, struct ip_vs_app *inc)
{
struct ip_vs_app *i;
__u16 hash;
__be16 port = inc->port;
int ret = 0;
- struct netns_ipvs *ipvs = net_ipvs(&init_net);
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
hash = tcp_app_hashkey(port);
@@ -605,10 +605,10 @@ static int tcp_register_app(struct ip_vs_app *inc)
static void
-tcp_unregister_app(struct ip_vs_app *inc)
+tcp_unregister_app(struct net *net, struct ip_vs_app *inc)
{
- struct netns_ipvs *ipvs = net_ipvs(&init_net);
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
spin_lock_bh(&ipvs->tcp_app_lock);
atomic_dec(&pd->appcnt);
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 3719837..1dc3941 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -353,14 +353,14 @@ static inline __u16 udp_app_hashkey(__be16 port)
}
-static int udp_register_app(struct ip_vs_app *inc)
+static int udp_register_app(struct net *net, struct ip_vs_app *inc)
{
struct ip_vs_app *i;
__u16 hash;
__be16 port = inc->port;
int ret = 0;
- struct netns_ipvs *ipvs = net_ipvs(&init_net);
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP);
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
hash = udp_app_hashkey(port);
@@ -382,10 +382,10 @@ static int udp_register_app(struct ip_vs_app *inc)
static void
-udp_unregister_app(struct ip_vs_app *inc)
+udp_unregister_app(struct net *net, struct ip_vs_app *inc)
{
- struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP);
- struct netns_ipvs *ipvs = net_ipvs(&init_net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+ struct netns_ipvs *ipvs = net_ipvs(net);
spin_lock_bh(&ipvs->udp_app_lock);
atomic_dec(&pd->appcnt);
--
1.7.2.3
^ permalink raw reply related
* [PATCH 44/79] IPVS: netns, common protocol changes and use of appcnt.
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
appcnt and timeout_table moved from struct ip_vs_protocol to
ip_vs proto_data.
struct net *net added as first param to
- register_app()
- unregister_app()
- app_conn_bind()
- ip_vs_conn_new()
[horms@verge.net.au: removed cosmetic-change-only hunk]
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 2 -
net/netfilter/ipvs/ip_vs_conn.c | 6 ++--
net/netfilter/ipvs/ip_vs_proto_sctp.c | 4 +-
net/netfilter/ipvs/ip_vs_proto_tcp.c | 5 +--
net/netfilter/ipvs/ip_vs_proto_udp.c | 4 +-
net/netfilter/ipvs/ip_vs_sync.c | 55 ++++++++++++++++++---------------
6 files changed, 39 insertions(+), 37 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 464ea36..cc6ae62 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -360,8 +360,6 @@ struct ip_vs_protocol {
u16 protocol;
u16 num_states;
int dont_defrag;
- atomic_t appcnt; /* counter of proto app incs */
- int *timeout_table; /* protocol timeout table */
void (*init)(struct ip_vs_protocol *pp);
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index a7aba6a..b2024c9 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -804,7 +804,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
struct ip_vs_dest *dest, __u32 fwmark)
{
struct ip_vs_conn *cp;
- struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, p->protocol);
cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
if (cp == NULL) {
@@ -863,8 +863,8 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
#endif
ip_vs_bind_xmit(cp);
- if (unlikely(pp && atomic_read(&pp->appcnt)))
- ip_vs_bind_app(cp, pp);
+ if (unlikely(pd && atomic_read(&pd->appcnt)))
+ ip_vs_bind_app(cp, pd->pp);
/*
* Allow conntrack to be preserved. By default, conntrack
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index 19bc379..0f14f79 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -1035,7 +1035,7 @@ static int sctp_register_app(struct ip_vs_app *inc)
}
}
list_add(&inc->p_list, &ipvs->sctp_apps[hash]);
- atomic_inc(&pd->pp->appcnt);
+ atomic_inc(&pd->appcnt);
out:
spin_unlock_bh(&ipvs->sctp_app_lock);
@@ -1048,7 +1048,7 @@ static void sctp_unregister_app(struct ip_vs_app *inc)
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_SCTP);
spin_lock_bh(&ipvs->sctp_app_lock);
- atomic_dec(&pd->pp->appcnt);
+ atomic_dec(&pd->appcnt);
list_del(&inc->p_list);
spin_unlock_bh(&ipvs->sctp_app_lock);
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index d7c2455..290b380 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -596,7 +596,7 @@ static int tcp_register_app(struct ip_vs_app *inc)
}
}
list_add(&inc->p_list, &ipvs->tcp_apps[hash]);
- atomic_inc(&pd->pp->appcnt);
+ atomic_inc(&pd->appcnt);
out:
spin_unlock_bh(&ipvs->tcp_app_lock);
@@ -611,7 +611,7 @@ tcp_unregister_app(struct ip_vs_app *inc)
struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP);
spin_lock_bh(&ipvs->tcp_app_lock);
- atomic_dec(&pd->pp->appcnt);
+ atomic_dec(&pd->appcnt);
list_del(&inc->p_list);
spin_unlock_bh(&ipvs->tcp_app_lock);
}
@@ -701,7 +701,6 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
.protocol = IPPROTO_TCP,
.num_states = IP_VS_TCP_S_LAST,
.dont_defrag = 0,
- .appcnt = ATOMIC_INIT(0),
.init = NULL,
.exit = NULL,
.init_netns = __ip_vs_tcp_init,
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index aa85df2..3719837 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -373,7 +373,7 @@ static int udp_register_app(struct ip_vs_app *inc)
}
}
list_add(&inc->p_list, &ipvs->udp_apps[hash]);
- atomic_inc(&pd->pp->appcnt);
+ atomic_inc(&pd->appcnt);
out:
spin_unlock_bh(&ipvs->udp_app_lock);
@@ -388,7 +388,7 @@ udp_unregister_app(struct ip_vs_app *inc)
struct netns_ipvs *ipvs = net_ipvs(&init_net);
spin_lock_bh(&ipvs->udp_app_lock);
- atomic_dec(&pd->pp->appcnt);
+ atomic_dec(&pd->appcnt);
list_del(&inc->p_list);
spin_unlock_bh(&ipvs->udp_app_lock);
}
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 662aa2c..6831e8f 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -725,17 +725,16 @@ ip_vs_conn_fill_param_sync(int af, union ip_vs_sync_conn *sc,
* Param: ...
* timeout is in sec.
*/
-static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags,
- unsigned state, unsigned protocol, unsigned type,
+static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
+ unsigned int flags, unsigned int state,
+ unsigned int protocol, unsigned int type,
const union nf_inet_addr *daddr, __be16 dport,
unsigned long timeout, __u32 fwmark,
- struct ip_vs_sync_conn_options *opt,
- struct ip_vs_protocol *pp)
+ struct ip_vs_sync_conn_options *opt)
{
struct ip_vs_dest *dest;
struct ip_vs_conn *cp;
-
if (!(flags & IP_VS_CONN_F_TEMPLATE))
cp = ip_vs_conn_in_get(param);
else
@@ -821,17 +820,23 @@ static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags,
if (timeout > MAX_SCHEDULE_TIMEOUT / HZ)
timeout = MAX_SCHEDULE_TIMEOUT / HZ;
cp->timeout = timeout*HZ;
- } else if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
- cp->timeout = pp->timeout_table[state];
- else
- cp->timeout = (3*60*HZ);
+ } else {
+ struct ip_vs_proto_data *pd;
+
+ pd = ip_vs_proto_data_get(net, protocol);
+ if (!(flags & IP_VS_CONN_F_TEMPLATE) && pd && pd->timeout_table)
+ cp->timeout = pd->timeout_table[state];
+ else
+ cp->timeout = (3*60*HZ);
+ }
ip_vs_conn_put(cp);
}
/*
* Process received multicast message for Version 0
*/
-static void ip_vs_process_message_v0(const char *buffer, const size_t buflen)
+static void ip_vs_process_message_v0(struct net *net, const char *buffer,
+ const size_t buflen)
{
struct ip_vs_sync_mesg_v0 *m = (struct ip_vs_sync_mesg_v0 *)buffer;
struct ip_vs_sync_conn_v0 *s;
@@ -879,7 +884,6 @@ static void ip_vs_process_message_v0(const char *buffer, const size_t buflen)
}
} else {
/* protocol in templates is not used for state/timeout */
- pp = NULL;
if (state > 0) {
IP_VS_DBG(2, "BACKUP v0, Invalid template state %u\n",
state);
@@ -894,9 +898,9 @@ static void ip_vs_process_message_v0(const char *buffer, const size_t buflen)
s->vport, ¶m);
/* Send timeout as Zero */
- ip_vs_proc_conn(¶m, flags, state, s->protocol, AF_INET,
+ ip_vs_proc_conn(net, ¶m, flags, state, s->protocol, AF_INET,
(union nf_inet_addr *)&s->daddr, s->dport,
- 0, 0, opt, pp);
+ 0, 0, opt);
}
}
@@ -945,7 +949,7 @@ static int ip_vs_proc_str(__u8 *p, unsigned int plen, unsigned int *data_len,
/*
* Process a Version 1 sync. connection
*/
-static inline int ip_vs_proc_sync_conn(__u8 *p, __u8 *msg_end)
+static inline int ip_vs_proc_sync_conn(struct net *net, __u8 *p, __u8 *msg_end)
{
struct ip_vs_sync_conn_options opt;
union ip_vs_sync_conn *s;
@@ -1043,7 +1047,6 @@ static inline int ip_vs_proc_sync_conn(__u8 *p, __u8 *msg_end)
}
} else {
/* protocol in templates is not used for state/timeout */
- pp = NULL;
if (state > 0) {
IP_VS_DBG(3, "BACKUP, Invalid template state %u\n",
state);
@@ -1058,18 +1061,18 @@ static inline int ip_vs_proc_sync_conn(__u8 *p, __u8 *msg_end)
}
/* If only IPv4, just silent skip IPv6 */
if (af == AF_INET)
- ip_vs_proc_conn(¶m, flags, state, s->v4.protocol, af,
+ ip_vs_proc_conn(net, ¶m, flags, state, s->v4.protocol, af,
(union nf_inet_addr *)&s->v4.daddr, s->v4.dport,
ntohl(s->v4.timeout), ntohl(s->v4.fwmark),
- (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL),
- pp);
+ (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
+ );
#ifdef CONFIG_IP_VS_IPV6
else
- ip_vs_proc_conn(¶m, flags, state, s->v6.protocol, af,
+ ip_vs_proc_conn(net, ¶m, flags, state, s->v6.protocol, af,
(union nf_inet_addr *)&s->v6.daddr, s->v6.dport,
ntohl(s->v6.timeout), ntohl(s->v6.fwmark),
- (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL),
- pp);
+ (opt_flags & IPVS_OPT_F_SEQ_DATA ? &opt : NULL)
+ );
#endif
return 0;
/* Error exit */
@@ -1083,7 +1086,8 @@ out:
* ip_vs_conn entries.
* Handles Version 0 & 1
*/
-static void ip_vs_process_message(__u8 *buffer, const size_t buflen)
+static void ip_vs_process_message(struct net *net, __u8 *buffer,
+ const size_t buflen)
{
struct ip_vs_sync_mesg *m2 = (struct ip_vs_sync_mesg *)buffer;
__u8 *p, *msg_end;
@@ -1136,7 +1140,8 @@ static void ip_vs_process_message(__u8 *buffer, const size_t buflen)
return;
}
/* Process a single sync_conn */
- if ((retc=ip_vs_proc_sync_conn(p, msg_end)) < 0) {
+ retc = ip_vs_proc_sync_conn(net, p, msg_end);
+ if (retc < 0) {
IP_VS_ERR_RL("BACKUP, Dropping buffer, Err: %d in decoding\n",
retc);
return;
@@ -1146,7 +1151,7 @@ static void ip_vs_process_message(__u8 *buffer, const size_t buflen)
}
} else {
/* Old type of message */
- ip_vs_process_message_v0(buffer, buflen);
+ ip_vs_process_message_v0(net, buffer, buflen);
return;
}
}
@@ -1500,7 +1505,7 @@ static int sync_thread_backup(void *data)
/* disable bottom half, because it accesses the data
shared by softirq while getting/creating conns */
local_bh_disable();
- ip_vs_process_message(tinfo->buf, len);
+ ip_vs_process_message(&init_net, tinfo->buf, len);
local_bh_enable();
}
}
--
1.7.2.3
^ permalink raw reply related
* [PATCH 43/79] IPVS: netns, use ip_vs_proto_data as param.
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
ip_vs_protocol *pp is replaced by ip_vs_proto_data *pd in
function call in ip_vs_protocol struct i.e. :,
- timeout_change()
- state_transition()
ip_vs_protocol_timeout_change() got ipvs as param, due to above
and a upcoming patch - defence work
Most of this changes are triggered by Julians comment:
"tcp_timeout_change should work with the new struct ip_vs_proto_data
so that tcp_state_table will go to pd->state_table
and set_tcp_state will get pd instead of pp"
*v3
Mostly comments from Julian
The pp -> pd conversion should start from functions like
ip_vs_out() that use pp = ip_vs_proto_get(iph.protocol),
now they should use ip_vs_proto_data_get(net, iph.protocol).
conn_in_get() and conn_out_get() unused param *pp, removed.
*v4
ip_vs_protocol_timeout_change() walk the proto_data path.
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 18 ++-----
net/netfilter/ipvs/ip_vs_conn.c | 2 -
net/netfilter/ipvs/ip_vs_core.c | 77 +++++++++++++++++++------------
net/netfilter/ipvs/ip_vs_ctl.c | 55 ++++++++++++++--------
net/netfilter/ipvs/ip_vs_proto.c | 21 ++++++---
net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 10 ++--
net/netfilter/ipvs/ip_vs_proto_sctp.c | 16 +++----
net/netfilter/ipvs/ip_vs_proto_tcp.c | 27 +++++------
net/netfilter/ipvs/ip_vs_proto_udp.c | 11 ++---
net/netfilter/xt_ipvs.c | 2 +-
10 files changed, 129 insertions(+), 110 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 3c45a00..464ea36 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -372,13 +372,12 @@ struct ip_vs_protocol {
void (*exit_netns)(struct net *net, struct ip_vs_proto_data *pd);
int (*conn_schedule)(int af, struct sk_buff *skb,
- struct ip_vs_protocol *pp,
+ struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp);
struct ip_vs_conn *
(*conn_in_get)(int af,
const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
@@ -386,7 +385,6 @@ struct ip_vs_protocol {
struct ip_vs_conn *
(*conn_out_get)(int af,
const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
@@ -404,7 +402,7 @@ struct ip_vs_protocol {
int (*state_transition)(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb,
- struct ip_vs_protocol *pp);
+ struct ip_vs_proto_data *pd);
int (*register_app)(struct ip_vs_app *inc);
@@ -417,9 +415,7 @@ struct ip_vs_protocol {
int offset,
const char *msg);
- void (*timeout_change)(struct ip_vs_protocol *pp, int flags);
-
- int (*set_state_timeout)(struct ip_vs_protocol *pp, char *sname, int to);
+ void (*timeout_change)(struct ip_vs_proto_data *pd, int flags);
};
/*
@@ -778,7 +774,6 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p);
struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p);
struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
@@ -786,7 +781,6 @@ struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p);
struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse);
@@ -917,7 +911,7 @@ static inline void ip_vs_pe_put(const struct ip_vs_pe *pe)
*/
extern int ip_vs_protocol_init(void);
extern void ip_vs_protocol_cleanup(void);
-extern void ip_vs_protocol_timeout_change(int flags);
+extern void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags);
extern int *ip_vs_create_timeout_table(int *table, int size);
extern int
ip_vs_set_state_timeout(int *table, int num, const char *const *names,
@@ -947,9 +941,9 @@ extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name);
extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler);
extern struct ip_vs_conn *
ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
- struct ip_vs_protocol *pp, int *ignored);
+ struct ip_vs_proto_data *pd, int *ignored);
extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
- struct ip_vs_protocol *pp);
+ struct ip_vs_proto_data *pd);
/*
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 7a0e79e..a7aba6a 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -329,7 +329,6 @@ ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb,
struct ip_vs_conn *
ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off, int inverse)
{
@@ -428,7 +427,6 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p)
struct ip_vs_conn *
ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off, int inverse)
{
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index d0616ea..9317aff 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -177,11 +177,11 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
static inline int
ip_vs_set_state(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb,
- struct ip_vs_protocol *pp)
+ struct ip_vs_proto_data *pd)
{
- if (unlikely(!pp->state_transition))
+ if (unlikely(!pd->pp->state_transition))
return 0;
- return pp->state_transition(cp, direction, skb, pp);
+ return pd->pp->state_transition(cp, direction, skb, pd);
}
static inline int
@@ -378,8 +378,9 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
*/
struct ip_vs_conn *
ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
- struct ip_vs_protocol *pp, int *ignored)
+ struct ip_vs_proto_data *pd, int *ignored)
{
+ struct ip_vs_protocol *pp = pd->pp;
struct ip_vs_conn *cp = NULL;
struct ip_vs_iphdr iph;
struct ip_vs_dest *dest;
@@ -408,7 +409,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
* Do not schedule replies from local real server.
*/
if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) &&
- (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) {
+ (cp = pp->conn_in_get(svc->af, skb, &iph, iph.len, 1))) {
IP_VS_DBG_PKT(12, svc->af, pp, skb, 0,
"Not scheduling reply for existing connection");
__ip_vs_conn_put(cp);
@@ -479,11 +480,12 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
* no destination is available for a new connection.
*/
int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
- struct ip_vs_protocol *pp)
+ struct ip_vs_proto_data *pd)
{
__be16 _ports[2], *pptr;
struct ip_vs_iphdr iph;
int unicast;
+
ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports);
@@ -530,10 +532,10 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_in_stats(cp, skb);
/* set state */
- cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+ cs = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
/* transmit the first SYN packet */
- ret = cp->packet_xmit(skb, cp, pp);
+ ret = cp->packet_xmit(skb, cp, pd->pp);
/* do not touch skb anymore */
atomic_inc(&cp->in_pkts);
@@ -840,7 +842,7 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related,
ip_vs_fill_iphdr(AF_INET, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+ cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
if (!cp)
return NF_ACCEPT;
@@ -917,7 +919,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+ cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
if (!cp)
return NF_ACCEPT;
@@ -956,9 +958,11 @@ static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
* Used for NAT and local client.
*/
static unsigned int
-handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
struct ip_vs_conn *cp, int ihl)
{
+ struct ip_vs_protocol *pp = pd->pp;
+
IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
if (!skb_make_writable(skb, ihl))
@@ -1007,7 +1011,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
ip_vs_out_stats(cp, skb);
- ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp);
+ ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pd);
skb->ipvs_property = 1;
if (!(cp->flags & IP_VS_CONN_F_NFCT))
ip_vs_notrack(skb);
@@ -1034,6 +1038,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
struct net *net = NULL;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
+ struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
EnterFunction(11);
@@ -1079,9 +1084,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
- pp = ip_vs_proto_get(iph.protocol);
- if (unlikely(!pp))
+ pd = ip_vs_proto_data_get(net, iph.protocol);
+ if (unlikely(!pd))
return NF_ACCEPT;
+ pp = pd->pp;
/* reassemble IP fragments */
#ifdef CONFIG_IP_VS_IPV6
@@ -1107,10 +1113,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
/*
* Check if the packet belongs to an existing entry
*/
- cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
+ cp = pp->conn_out_get(af, skb, &iph, iph.len, 0);
if (likely(cp))
- return handle_response(af, skb, pp, cp, iph.len);
+ return handle_response(af, skb, pd, cp, iph.len);
if (sysctl_ip_vs_nat_icmp_send &&
(pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_UDP ||
@@ -1236,12 +1242,14 @@ ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb,
static int
ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
{
+ struct net *net = NULL;
struct iphdr *iph;
struct icmphdr _icmph, *ic;
struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
struct ip_vs_iphdr ciph;
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
+ struct ip_vs_proto_data *pd;
unsigned int offset, ihl, verdict;
union nf_inet_addr snet;
@@ -1283,9 +1291,11 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
- pp = ip_vs_proto_get(cih->protocol);
- if (!pp)
+ net = skb_net(skb);
+ pd = ip_vs_proto_data_get(net, cih->protocol);
+ if (!pd)
return NF_ACCEPT;
+ pp = pd->pp;
/* Is the embedded protocol header present? */
if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
@@ -1299,10 +1309,10 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
ip_vs_fill_iphdr(AF_INET, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_in_get(AF_INET, skb, pp, &ciph, offset, 1);
+ cp = pp->conn_in_get(AF_INET, skb, &ciph, offset, 1);
if (!cp) {
/* The packet could also belong to a local client */
- cp = pp->conn_out_get(AF_INET, skb, pp, &ciph, offset, 1);
+ cp = pp->conn_out_get(AF_INET, skb, &ciph, offset, 1);
if (cp) {
snet.ip = iph->saddr;
return handle_response_icmp(AF_INET, skb, &snet,
@@ -1346,6 +1356,7 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
static int
ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
{
+ struct net *net = NULL;
struct ipv6hdr *iph;
struct icmp6hdr _icmph, *ic;
struct ipv6hdr _ciph, *cih; /* The ip header contained
@@ -1353,6 +1364,7 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
struct ip_vs_iphdr ciph;
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
+ struct ip_vs_proto_data *pd;
unsigned int offset, verdict;
union nf_inet_addr snet;
struct rt6_info *rt;
@@ -1395,9 +1407,11 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
if (cih == NULL)
return NF_ACCEPT; /* The packet looks wrong, ignore */
- pp = ip_vs_proto_get(cih->nexthdr);
- if (!pp)
+ net = skb_net(skb);
+ pd = ip_vs_proto_data_get(net, cih->nexthdr);
+ if (!pd)
return NF_ACCEPT;
+ pp = pd->pp;
/* Is the embedded protocol header present? */
/* TODO: we don't support fragmentation at the moment anyways */
@@ -1411,10 +1425,10 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
ip_vs_fill_iphdr(AF_INET6, cih, &ciph);
/* The embedded headers contain source and dest in reverse order */
- cp = pp->conn_in_get(AF_INET6, skb, pp, &ciph, offset, 1);
+ cp = pp->conn_in_get(AF_INET6, skb, &ciph, offset, 1);
if (!cp) {
/* The packet could also belong to a local client */
- cp = pp->conn_out_get(AF_INET6, skb, pp, &ciph, offset, 1);
+ cp = pp->conn_out_get(AF_INET6, skb, &ciph, offset, 1);
if (cp) {
ipv6_addr_copy(&snet.in6, &iph->saddr);
return handle_response_icmp(AF_INET6, skb, &snet,
@@ -1457,8 +1471,10 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
static unsigned int
ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
{
+ struct net *net = NULL;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
+ struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
int ret, restart, pkts;
@@ -1514,20 +1530,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
}
+ net = skb_net(skb);
/* Protocol supported? */
- pp = ip_vs_proto_get(iph.protocol);
- if (unlikely(!pp))
+ pd = ip_vs_proto_data_get(net, iph.protocol);
+ if (unlikely(!pd))
return NF_ACCEPT;
-
+ pp = pd->pp;
/*
* Check if the packet belongs to an existing connection entry
*/
- cp = pp->conn_in_get(af, skb, pp, &iph, iph.len, 0);
+ cp = pp->conn_in_get(af, skb, &iph, iph.len, 0);
if (unlikely(!cp)) {
int v;
- if (!pp->conn_schedule(af, skb, pp, &v, &cp))
+ if (!pp->conn_schedule(af, skb, pd, &v, &cp))
return v;
}
@@ -1555,7 +1572,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
}
ip_vs_in_stats(cp, skb);
- restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pp);
+ restart = ip_vs_set_state(cp, IP_VS_DIR_INPUT, skb, pd);
if (cp->packet_xmit)
ret = cp->packet_xmit(skb, cp, pp);
/* do not touch skb anymore */
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 2d7c96b..88474f1 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -38,6 +38,7 @@
#include <linux/mutex.h>
#include <net/net_namespace.h>
+#include <linux/nsproxy.h>
#include <net/ip.h>
#ifdef CONFIG_IP_VS_IPV6
#include <net/ipv6.h>
@@ -125,7 +126,7 @@ static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
* update_defense_level is called from keventd and from sysctl,
* so it needs to protect itself from softirqs
*/
-static void update_defense_level(void)
+static void update_defense_level(struct netns_ipvs *ipvs)
{
struct sysinfo i;
static int old_secure_tcp = 0;
@@ -239,7 +240,8 @@ static void update_defense_level(void)
}
old_secure_tcp = sysctl_ip_vs_secure_tcp;
if (to_change >= 0)
- ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
+ ip_vs_protocol_timeout_change(ipvs,
+ sysctl_ip_vs_secure_tcp > 1);
spin_unlock(&ip_vs_securetcp_lock);
local_bh_enable();
@@ -255,7 +257,10 @@ static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
static void defense_work_handler(struct work_struct *work)
{
- update_defense_level();
+ struct net *net = &init_net;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ update_defense_level(ipvs);
if (atomic_read(&ip_vs_dropentry))
ip_vs_random_dropentry();
@@ -1502,6 +1507,7 @@ static int
proc_do_defense_mode(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
{
+ struct net *net = current->nsproxy->net_ns;
int *valp = table->data;
int val = *valp;
int rc;
@@ -1512,7 +1518,7 @@ proc_do_defense_mode(ctl_table *table, int write,
/* Restore the correct value */
*valp = val;
} else {
- update_defense_level();
+ update_defense_level(net_ipvs(net));
}
}
return rc;
@@ -2033,8 +2039,10 @@ static const struct file_operations ip_vs_stats_fops = {
/*
* Set timeout values for tcp tcpfin udp in the timeout_table.
*/
-static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
+static int ip_vs_set_timeout(struct net *net, struct ip_vs_timeout_user *u)
{
+ struct ip_vs_proto_data *pd;
+
IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
u->tcp_timeout,
u->tcp_fin_timeout,
@@ -2042,19 +2050,22 @@ static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
#ifdef CONFIG_IP_VS_PROTO_TCP
if (u->tcp_timeout) {
- ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
+ pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]
= u->tcp_timeout * HZ;
}
if (u->tcp_fin_timeout) {
- ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
+ pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ pd->timeout_table[IP_VS_TCP_S_FIN_WAIT]
= u->tcp_fin_timeout * HZ;
}
#endif
#ifdef CONFIG_IP_VS_PROTO_UDP
if (u->udp_timeout) {
- ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
+ pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
+ pd->timeout_table[IP_VS_UDP_S_NORMAL]
= u->udp_timeout * HZ;
}
#endif
@@ -2158,7 +2169,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
goto out_unlock;
} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
/* Set timeout values for (tcp tcpfin udp) */
- ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
+ ret = ip_vs_set_timeout(net, (struct ip_vs_timeout_user *)arg);
goto out_unlock;
} else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
@@ -2370,17 +2381,19 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
}
static inline void
-__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
+__ip_vs_get_timeouts(struct net *net, struct ip_vs_timeout_user *u)
{
+ struct ip_vs_proto_data *pd;
+
#ifdef CONFIG_IP_VS_PROTO_TCP
- u->tcp_timeout =
- ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
- u->tcp_fin_timeout =
- ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
+ pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+ u->tcp_timeout = pd->timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
+ u->tcp_fin_timeout = pd->timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
#endif
#ifdef CONFIG_IP_VS_PROTO_UDP
+ pd = ip_vs_proto_data_get(net, IPPROTO_UDP);
u->udp_timeout =
- ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
+ pd->timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
#endif
}
@@ -2521,7 +2534,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
struct ip_vs_timeout_user t;
- __ip_vs_get_timeouts(&t);
+ __ip_vs_get_timeouts(net, &t);
if (copy_to_user(user, &t, sizeof(t)) != 0)
ret = -EFAULT;
}
@@ -3092,11 +3105,11 @@ static int ip_vs_genl_del_daemon(struct nlattr **attrs)
return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
}
-static int ip_vs_genl_set_config(struct nlattr **attrs)
+static int ip_vs_genl_set_config(struct net *net, struct nlattr **attrs)
{
struct ip_vs_timeout_user t;
- __ip_vs_get_timeouts(&t);
+ __ip_vs_get_timeouts(net, &t);
if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
@@ -3108,7 +3121,7 @@ static int ip_vs_genl_set_config(struct nlattr **attrs)
if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
- return ip_vs_set_timeout(&t);
+ return ip_vs_set_timeout(net, &t);
}
static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
@@ -3129,7 +3142,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
ret = ip_vs_flush(net);
goto out;
} else if (cmd == IPVS_CMD_SET_CONFIG) {
- ret = ip_vs_genl_set_config(info->attrs);
+ ret = ip_vs_genl_set_config(net, info->attrs);
goto out;
} else if (cmd == IPVS_CMD_NEW_DAEMON ||
cmd == IPVS_CMD_DEL_DAEMON) {
@@ -3281,7 +3294,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
{
struct ip_vs_timeout_user t;
- __ip_vs_get_timeouts(&t);
+ __ip_vs_get_timeouts(net, &t);
#ifdef CONFIG_IP_VS_PROTO_TCP
NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 9f609d4..6ac986c 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -152,9 +152,8 @@ EXPORT_SYMBOL(ip_vs_proto_get);
* get ip_vs_protocol object data by netns and proto
*/
struct ip_vs_proto_data *
-ip_vs_proto_data_get(struct net *net, unsigned short proto)
+__ipvs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto)
{
- struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_proto_data *pd;
unsigned hash = IP_VS_PROTO_HASH(proto);
@@ -165,20 +164,28 @@ ip_vs_proto_data_get(struct net *net, unsigned short proto)
return NULL;
}
+
+struct ip_vs_proto_data *
+ip_vs_proto_data_get(struct net *net, unsigned short proto)
+{
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ return __ipvs_proto_data_get(ipvs, proto);
+}
EXPORT_SYMBOL(ip_vs_proto_data_get);
/*
* Propagate event for state change to all protocols
*/
-void ip_vs_protocol_timeout_change(int flags)
+void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags)
{
- struct ip_vs_protocol *pp;
+ struct ip_vs_proto_data *pd;
int i;
for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
- for (pp = ip_vs_proto_table[i]; pp; pp = pp->next) {
- if (pp->timeout_change)
- pp->timeout_change(pp, flags);
+ for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) {
+ if (pd->pp->timeout_change)
+ pd->pp->timeout_change(pd, flags);
}
}
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index b8b37fa..28039cb 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -55,7 +55,7 @@ ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph,
}
static struct ip_vs_conn *
-ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
+ah_esp_conn_in_get(int af, const struct sk_buff *skb,
const struct ip_vs_iphdr *iph, unsigned int proto_off,
int inverse)
{
@@ -72,7 +72,7 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for outin packet "
"%s%s %s->%s\n",
inverse ? "ICMP+" : "",
- pp->name,
+ ip_vs_proto_get(iph->protocol)->name,
IP_VS_DBG_ADDR(af, &iph->saddr),
IP_VS_DBG_ADDR(af, &iph->daddr));
}
@@ -83,7 +83,6 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp,
static struct ip_vs_conn *
ah_esp_conn_out_get(int af, const struct sk_buff *skb,
- struct ip_vs_protocol *pp,
const struct ip_vs_iphdr *iph,
unsigned int proto_off,
int inverse)
@@ -97,7 +96,7 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet "
"%s%s %s->%s\n",
inverse ? "ICMP+" : "",
- pp->name,
+ ip_vs_proto_get(iph->protocol)->name,
IP_VS_DBG_ADDR(af, &iph->saddr),
IP_VS_DBG_ADDR(af, &iph->daddr));
}
@@ -107,7 +106,7 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb,
static int
-ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp)
{
/*
@@ -137,7 +136,6 @@ struct ip_vs_protocol ip_vs_protocol_ah = {
.app_conn_bind = NULL,
.debug_packet = ip_vs_tcpudp_debug_packet,
.timeout_change = NULL, /* ISAKMP */
- .set_state_timeout = NULL,
};
#endif
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index f826dd1..19bc379 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -9,7 +9,7 @@
#include <net/ip_vs.h>
static int
-sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp)
{
struct net *net;
@@ -47,10 +47,10 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
* Let the virtual server select a real server for the
* incoming connection, and create a connection entry.
*/
- *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
+ *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
if (!*cpp && ignored <= 0) {
if (!ignored)
- *verdict = ip_vs_leave(svc, skb, pp);
+ *verdict = ip_vs_leave(svc, skb, pd);
else {
ip_vs_service_put(svc);
*verdict = NF_DROP;
@@ -907,14 +907,13 @@ static const char *sctp_state_name(int state)
}
static inline int
-set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
+set_sctp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
int direction, const struct sk_buff *skb)
{
sctp_chunkhdr_t _sctpch, *sch;
unsigned char chunk_type;
int event, next_state;
int ihl;
- struct ip_vs_proto_data *pd;
#ifdef CONFIG_IP_VS_IPV6
ihl = cp->af == AF_INET ? ip_hdrlen(skb) : sizeof(struct ipv6hdr);
@@ -966,7 +965,7 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
IP_VS_DBG_BUF(8, "%s %s %s:%d->"
"%s:%d state: %s->%s conn->refcnt:%d\n",
- pp->name,
+ pd->pp->name,
((direction == IP_VS_DIR_OUTPUT) ?
"output " : "input "),
IP_VS_DBG_ADDR(cp->af, &cp->daddr),
@@ -990,7 +989,6 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
}
}
}
- pd = ip_vs_proto_data_get(&init_net, pp->protocol); /* tmp fix */
if (likely(pd))
cp->timeout = pd->timeout_table[cp->state = next_state];
else /* What to do ? */
@@ -1001,12 +999,12 @@ set_sctp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
static int
sctp_state_transition(struct ip_vs_conn *cp, int direction,
- const struct sk_buff *skb, struct ip_vs_protocol *pp)
+ const struct sk_buff *skb, struct ip_vs_proto_data *pd)
{
int ret = 0;
spin_lock(&cp->lock);
- ret = set_sctp_state(pp, cp, direction, skb);
+ ret = set_sctp_state(pd, cp, direction, skb);
spin_unlock(&cp->lock);
return ret;
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 9d9df3d..d7c2455 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -32,7 +32,7 @@
#include <net/ip_vs.h>
static int
-tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp)
{
struct net *net;
@@ -68,10 +68,10 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
* Let the virtual server select a real server for the
* incoming connection, and create a connection entry.
*/
- *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
+ *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
if (!*cpp && ignored <= 0) {
if (!ignored)
- *verdict = ip_vs_leave(svc, skb, pp);
+ *verdict = ip_vs_leave(svc, skb, pd);
else {
ip_vs_service_put(svc);
*verdict = NF_DROP;
@@ -448,10 +448,7 @@ static struct tcp_states_t tcp_states_dos [] = {
/*rst*/ {{sCL, sCL, sCL, sSR, sCL, sCL, sCL, sCL, sLA, sLI, sCL }},
};
-static struct tcp_states_t *tcp_state_table = tcp_states;
-
-
-static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
+static void tcp_timeout_change(struct ip_vs_proto_data *pd, int flags)
{
int on = (flags & 1); /* secure_tcp */
@@ -461,7 +458,7 @@ static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
** for most if not for all of the applications. Something
** like "capabilities" (flags) for each object.
*/
- tcp_state_table = (on? tcp_states_dos : tcp_states);
+ pd->tcp_state_table = (on ? tcp_states_dos : tcp_states);
}
static inline int tcp_state_idx(struct tcphdr *th)
@@ -478,13 +475,12 @@ static inline int tcp_state_idx(struct tcphdr *th)
}
static inline void
-set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
+set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
int direction, struct tcphdr *th)
{
int state_idx;
int new_state = IP_VS_TCP_S_CLOSE;
int state_off = tcp_state_off[direction];
- struct ip_vs_proto_data *pd; /* Temp fix */
/*
* Update state offset to INPUT_ONLY if necessary
@@ -502,7 +498,8 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
goto tcp_state_out;
}
- new_state = tcp_state_table[state_off+state_idx].next_state[cp->state];
+ new_state =
+ pd->tcp_state_table[state_off+state_idx].next_state[cp->state];
tcp_state_out:
if (new_state != cp->state) {
@@ -510,7 +507,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
IP_VS_DBG_BUF(8, "%s %s [%c%c%c%c] %s:%d->"
"%s:%d state: %s->%s conn->refcnt:%d\n",
- pp->name,
+ pd->pp->name,
((state_off == TCP_DIR_OUTPUT) ?
"output " : "input "),
th->syn ? 'S' : '.',
@@ -540,7 +537,6 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
}
}
- pd = ip_vs_proto_data_get(&init_net, pp->protocol);
if (likely(pd))
cp->timeout = pd->timeout_table[cp->state = new_state];
else /* What to do ? */
@@ -553,7 +549,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
static int
tcp_state_transition(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb,
- struct ip_vs_protocol *pp)
+ struct ip_vs_proto_data *pd)
{
struct tcphdr _tcph, *th;
@@ -568,7 +564,7 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
return 0;
spin_lock(&cp->lock);
- set_tcp_state(pp, cp, direction, th);
+ set_tcp_state(pd, cp, direction, th);
spin_unlock(&cp->lock);
return 1;
@@ -691,6 +687,7 @@ static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
spin_lock_init(&ipvs->tcp_app_lock);
pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
sizeof(tcp_timeouts));
+ pd->tcp_state_table = tcp_states;
}
static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd)
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 71a4721..aa85df2 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -29,7 +29,7 @@
#include <net/ip6_checksum.h>
static int
-udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
+udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
int *verdict, struct ip_vs_conn **cpp)
{
struct net *net;
@@ -64,10 +64,10 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
* Let the virtual server select a real server for the
* incoming connection, and create a connection entry.
*/
- *cpp = ip_vs_schedule(svc, skb, pp, &ignored);
+ *cpp = ip_vs_schedule(svc, skb, pd, &ignored);
if (!*cpp && ignored <= 0) {
if (!ignored)
- *verdict = ip_vs_leave(svc, skb, pp);
+ *verdict = ip_vs_leave(svc, skb, pd);
else {
ip_vs_service_put(svc);
*verdict = NF_DROP;
@@ -457,11 +457,8 @@ static const char * udp_state_name(int state)
static int
udp_state_transition(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb,
- struct ip_vs_protocol *pp)
+ struct ip_vs_proto_data *pd)
{
- struct ip_vs_proto_data *pd; /* Temp fix, pp will be replaced by pd */
-
- pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP);
if (unlikely(!pd)) {
pr_err("UDP no ns data\n");
return 0;
diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c
index 9127a3d..bb10b07 100644
--- a/net/netfilter/xt_ipvs.c
+++ b/net/netfilter/xt_ipvs.c
@@ -85,7 +85,7 @@ ipvs_mt(const struct sk_buff *skb, struct xt_action_param *par)
/*
* Check if the packet belongs to an existing entry
*/
- cp = pp->conn_out_get(family, skb, pp, &iph, iph.len, 1 /* inverse */);
+ cp = pp->conn_out_get(family, skb, &iph, iph.len, 1 /* inverse */);
if (unlikely(cp == NULL)) {
match = false;
goto out;
--
1.7.2.3
^ permalink raw reply related
* [PATCH 42/79] IPVS: netns preparation for proto_ah_esp
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
In this phase (one), all local vars will be moved to ipvs struct.
Remaining work, add param struct net *net to a couple of
functions that common for all protos.
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_proto.c | 6 ++++++
net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 20 ++++----------------
2 files changed, 10 insertions(+), 16 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 001b2f8..9f609d4 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -316,6 +316,12 @@ static int __net_init __ip_vs_protocol_init(struct net *net)
#ifdef CONFIG_IP_VS_PROTO_SCTP
register_ip_vs_proto_netns(net, &ip_vs_protocol_sctp);
#endif
+#ifdef CONFIG_IP_VS_PROTO_AH
+ register_ip_vs_proto_netns(net, &ip_vs_protocol_ah);
+#endif
+#ifdef CONFIG_IP_VS_PROTO_ESP
+ register_ip_vs_proto_netns(net, &ip_vs_protocol_esp);
+#endif
return 0;
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
index 3a04611..b8b37fa 100644
--- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c
@@ -117,26 +117,14 @@ ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
return 0;
}
-static void ah_esp_init(struct ip_vs_protocol *pp)
-{
- /* nothing to do now */
-}
-
-
-static void ah_esp_exit(struct ip_vs_protocol *pp)
-{
- /* nothing to do now */
-}
-
-
#ifdef CONFIG_IP_VS_PROTO_AH
struct ip_vs_protocol ip_vs_protocol_ah = {
.name = "AH",
.protocol = IPPROTO_AH,
.num_states = 1,
.dont_defrag = 1,
- .init = ah_esp_init,
- .exit = ah_esp_exit,
+ .init = NULL,
+ .exit = NULL,
.conn_schedule = ah_esp_conn_schedule,
.conn_in_get = ah_esp_conn_in_get,
.conn_out_get = ah_esp_conn_out_get,
@@ -159,8 +147,8 @@ struct ip_vs_protocol ip_vs_protocol_esp = {
.protocol = IPPROTO_ESP,
.num_states = 1,
.dont_defrag = 1,
- .init = ah_esp_init,
- .exit = ah_esp_exit,
+ .init = NULL,
+ .exit = NULL,
.conn_schedule = ah_esp_conn_schedule,
.conn_in_get = ah_esp_conn_in_get,
.conn_out_get = ah_esp_conn_out_get,
--
1.7.2.3
^ permalink raw reply related
* [PATCH 40/79] IPVS: netns preparation for proto_udp
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
In this phase (one), all local vars will be moved to ipvs struct.
Remaining work, add param struct net *net to a couple of
functions that is common for all protos and use ip_vs_proto_data
*v3
Removed unused function set_state_timeout()
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/netns/ip_vs.h | 8 +++
net/netfilter/ipvs/ip_vs_proto.c | 3 +
net/netfilter/ipvs/ip_vs_proto_udp.c | 86 +++++++++++++++++-----------------
3 files changed, 54 insertions(+), 43 deletions(-)
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
index ac77363..62b1448 100644
--- a/include/net/netns/ip_vs.h
+++ b/include/net/netns/ip_vs.h
@@ -39,6 +39,14 @@ struct netns_ipvs {
struct list_head tcp_apps[TCP_APP_TAB_SIZE];
spinlock_t tcp_app_lock;
#endif
+ /* ip_vs_proto_udp */
+#ifdef CONFIG_IP_VS_PROTO_UDP
+ #define UDP_APP_TAB_BITS 4
+ #define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
+ #define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
+ struct list_head udp_apps[UDP_APP_TAB_SIZE];
+ spinlock_t udp_app_lock;
+#endif
/* ip_vs_lblc */
int sysctl_lblc_expiration;
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 320c6a6..cdc4142 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -310,6 +310,9 @@ static int __net_init __ip_vs_protocol_init(struct net *net)
#ifdef CONFIG_IP_VS_PROTO_TCP
register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
#endif
+#ifdef CONFIG_IP_VS_PROTO_UDP
+ register_ip_vs_proto_netns(net, &ip_vs_protocol_udp);
+#endif
return 0;
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index 5ab54f6..71a4721 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -9,7 +9,8 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Changes:
+ * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
+ * Network name space (netns) aware.
*
*/
@@ -345,19 +346,6 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp)
return 1;
}
-
-/*
- * Note: the caller guarantees that only one of register_app,
- * unregister_app or app_conn_bind is called each time.
- */
-
-#define UDP_APP_TAB_BITS 4
-#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
-#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
-
-static struct list_head udp_apps[UDP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(udp_app_lock);
-
static inline __u16 udp_app_hashkey(__be16 port)
{
return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port)
@@ -371,22 +359,24 @@ static int udp_register_app(struct ip_vs_app *inc)
__u16 hash;
__be16 port = inc->port;
int ret = 0;
+ struct netns_ipvs *ipvs = net_ipvs(&init_net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP);
hash = udp_app_hashkey(port);
- spin_lock_bh(&udp_app_lock);
- list_for_each_entry(i, &udp_apps[hash], p_list) {
+ spin_lock_bh(&ipvs->udp_app_lock);
+ list_for_each_entry(i, &ipvs->udp_apps[hash], p_list) {
if (i->port == port) {
ret = -EEXIST;
goto out;
}
}
- list_add(&inc->p_list, &udp_apps[hash]);
- atomic_inc(&ip_vs_protocol_udp.appcnt);
+ list_add(&inc->p_list, &ipvs->udp_apps[hash]);
+ atomic_inc(&pd->pp->appcnt);
out:
- spin_unlock_bh(&udp_app_lock);
+ spin_unlock_bh(&ipvs->udp_app_lock);
return ret;
}
@@ -394,15 +384,19 @@ static int udp_register_app(struct ip_vs_app *inc)
static void
udp_unregister_app(struct ip_vs_app *inc)
{
- spin_lock_bh(&udp_app_lock);
- atomic_dec(&ip_vs_protocol_udp.appcnt);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP);
+ struct netns_ipvs *ipvs = net_ipvs(&init_net);
+
+ spin_lock_bh(&ipvs->udp_app_lock);
+ atomic_dec(&pd->pp->appcnt);
list_del(&inc->p_list);
- spin_unlock_bh(&udp_app_lock);
+ spin_unlock_bh(&ipvs->udp_app_lock);
}
static int udp_app_conn_bind(struct ip_vs_conn *cp)
{
+ struct netns_ipvs *ipvs = net_ipvs(&init_net);
int hash;
struct ip_vs_app *inc;
int result = 0;
@@ -414,12 +408,12 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = udp_app_hashkey(cp->vport);
- spin_lock(&udp_app_lock);
- list_for_each_entry(inc, &udp_apps[hash], p_list) {
+ spin_lock(&ipvs->udp_app_lock);
+ list_for_each_entry(inc, &ipvs->udp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- spin_unlock(&udp_app_lock);
+ spin_unlock(&ipvs->udp_app_lock);
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -436,14 +430,14 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp)
goto out;
}
}
- spin_unlock(&udp_app_lock);
+ spin_unlock(&ipvs->udp_app_lock);
out:
return result;
}
-static int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
+static const int udp_timeouts[IP_VS_UDP_S_LAST+1] = {
[IP_VS_UDP_S_NORMAL] = 5*60*HZ,
[IP_VS_UDP_S_LAST] = 2*HZ,
};
@@ -453,14 +447,6 @@ static const char *const udp_state_name_table[IP_VS_UDP_S_LAST+1] = {
[IP_VS_UDP_S_LAST] = "BUG!",
};
-
-static int
-udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
-{
- return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST,
- udp_state_name_table, sname, to);
-}
-
static const char * udp_state_name(int state)
{
if (state >= IP_VS_UDP_S_LAST)
@@ -473,18 +459,31 @@ udp_state_transition(struct ip_vs_conn *cp, int direction,
const struct sk_buff *skb,
struct ip_vs_protocol *pp)
{
- cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL];
+ struct ip_vs_proto_data *pd; /* Temp fix, pp will be replaced by pd */
+
+ pd = ip_vs_proto_data_get(&init_net, IPPROTO_UDP);
+ if (unlikely(!pd)) {
+ pr_err("UDP no ns data\n");
+ return 0;
+ }
+
+ cp->timeout = pd->timeout_table[IP_VS_UDP_S_NORMAL];
return 1;
}
-static void udp_init(struct ip_vs_protocol *pp)
+static void __udp_init(struct net *net, struct ip_vs_proto_data *pd)
{
- IP_VS_INIT_HASH_TABLE(udp_apps);
- pp->timeout_table = udp_timeouts;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ ip_vs_init_hash_table(ipvs->udp_apps, UDP_APP_TAB_SIZE);
+ spin_lock_init(&ipvs->udp_app_lock);
+ pd->timeout_table = ip_vs_create_timeout_table((int *)udp_timeouts,
+ sizeof(udp_timeouts));
}
-static void udp_exit(struct ip_vs_protocol *pp)
+static void __udp_exit(struct net *net, struct ip_vs_proto_data *pd)
{
+ kfree(pd->timeout_table);
}
@@ -493,8 +492,10 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
.protocol = IPPROTO_UDP,
.num_states = IP_VS_UDP_S_LAST,
.dont_defrag = 0,
- .init = udp_init,
- .exit = udp_exit,
+ .init = NULL,
+ .exit = NULL,
+ .init_netns = __udp_init,
+ .exit_netns = __udp_exit,
.conn_schedule = udp_conn_schedule,
.conn_in_get = ip_vs_conn_in_get_proto,
.conn_out_get = ip_vs_conn_out_get_proto,
@@ -508,5 +509,4 @@ struct ip_vs_protocol ip_vs_protocol_udp = {
.app_conn_bind = udp_app_conn_bind,
.debug_packet = ip_vs_tcpudp_debug_packet,
.timeout_change = NULL,
- .set_state_timeout = udp_set_state_timeout,
};
--
1.7.2.3
^ permalink raw reply related
* [PATCH 39/79] IPVS: netns preparation for proto_tcp
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
In this phase (one), all local vars will be moved to ipvs struct.
Remaining work, add param struct net *net to a couple of
functions that is common for all protos and use all
ip_vs_proto_data
*v3
Removed unused function as sugested by Simon
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 2 +-
include/net/netns/ip_vs.h | 8 +++
net/netfilter/ipvs/ip_vs_ftp.c | 8 ++-
net/netfilter/ipvs/ip_vs_proto.c | 13 ++++-
net/netfilter/ipvs/ip_vs_proto_tcp.c | 97 ++++++++++++++++++----------------
5 files changed, 79 insertions(+), 49 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 88d4e40..3c45a00 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -807,7 +807,7 @@ extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp);
extern const char * ip_vs_state_name(__u16 proto, int state);
-extern void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp);
+extern void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp);
extern int ip_vs_check_template(struct ip_vs_conn *ct);
extern void ip_vs_random_dropentry(void);
extern int ip_vs_conn_init(void);
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
index 6f4e089..ac77363 100644
--- a/include/net/netns/ip_vs.h
+++ b/include/net/netns/ip_vs.h
@@ -31,6 +31,14 @@ struct netns_ipvs {
/* ip_vs_proto */
#define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */
struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
+ /* ip_vs_proto_tcp */
+#ifdef CONFIG_IP_VS_PROTO_TCP
+ #define TCP_APP_TAB_BITS 4
+ #define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
+ #define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
+ struct list_head tcp_apps[TCP_APP_TAB_SIZE];
+ spinlock_t tcp_app_lock;
+#endif
/* ip_vs_lblc */
int sysctl_lblc_expiration;
diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c
index 0e762f3..b38ae94 100644
--- a/net/netfilter/ipvs/ip_vs_ftp.c
+++ b/net/netfilter/ipvs/ip_vs_ftp.c
@@ -157,6 +157,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
int ret = 0;
enum ip_conntrack_info ctinfo;
struct nf_conn *ct;
+ struct net *net;
#ifdef CONFIG_IP_VS_IPV6
/* This application helper doesn't work with IPv6 yet,
@@ -257,8 +258,9 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
* would be adjusted twice.
*/
+ net = skb_net(skb);
cp->app_data = NULL;
- ip_vs_tcp_conn_listen(n_cp);
+ ip_vs_tcp_conn_listen(net, n_cp);
ip_vs_conn_put(n_cp);
return ret;
}
@@ -287,6 +289,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
union nf_inet_addr to;
__be16 port;
struct ip_vs_conn *n_cp;
+ struct net *net;
#ifdef CONFIG_IP_VS_IPV6
/* This application helper doesn't work with IPv6 yet,
@@ -378,7 +381,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
/*
* Move tunnel to listen state
*/
- ip_vs_tcp_conn_listen(n_cp);
+ net = skb_net(skb);
+ ip_vs_tcp_conn_listen(net, n_cp);
ip_vs_conn_put(n_cp);
return 1;
diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c
index 576e296..320c6a6 100644
--- a/net/netfilter/ipvs/ip_vs_proto.c
+++ b/net/netfilter/ipvs/ip_vs_proto.c
@@ -307,12 +307,23 @@ ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp,
*/
static int __net_init __ip_vs_protocol_init(struct net *net)
{
+#ifdef CONFIG_IP_VS_PROTO_TCP
+ register_ip_vs_proto_netns(net, &ip_vs_protocol_tcp);
+#endif
return 0;
}
static void __net_exit __ip_vs_protocol_cleanup(struct net *net)
{
- /* empty */
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ struct ip_vs_proto_data *pd;
+ int i;
+
+ /* unregister all the ipvs proto data for this netns */
+ for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) {
+ while ((pd = ipvs->proto_data_table[i]) != NULL)
+ unregister_ip_vs_proto_netns(net, pd);
+ }
}
static struct pernet_operations ipvs_proto_ops = {
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index c175d31..9d9df3d 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -9,8 +9,12 @@
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
- * Changes:
+ * Changes: Hans Schillstrom <hans.schillstrom@ericsson.com>
*
+ * Network name space (netns) aware.
+ * Global data moved to netns i.e struct netns_ipvs
+ * tcp_timeouts table has copy per netns in a hash table per
+ * protocol ip_vs_proto_data and is handled by netns
*/
#define KMSG_COMPONENT "IPVS"
@@ -345,7 +349,7 @@ static const int tcp_state_off[IP_VS_DIR_LAST] = {
/*
* Timeout table[state]
*/
-static int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
+static const int tcp_timeouts[IP_VS_TCP_S_LAST+1] = {
[IP_VS_TCP_S_NONE] = 2*HZ,
[IP_VS_TCP_S_ESTABLISHED] = 15*60*HZ,
[IP_VS_TCP_S_SYN_SENT] = 2*60*HZ,
@@ -460,13 +464,6 @@ static void tcp_timeout_change(struct ip_vs_protocol *pp, int flags)
tcp_state_table = (on? tcp_states_dos : tcp_states);
}
-static int
-tcp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to)
-{
- return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_TCP_S_LAST,
- tcp_state_name_table, sname, to);
-}
-
static inline int tcp_state_idx(struct tcphdr *th)
{
if (th->rst)
@@ -487,6 +484,7 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
int state_idx;
int new_state = IP_VS_TCP_S_CLOSE;
int state_off = tcp_state_off[direction];
+ struct ip_vs_proto_data *pd; /* Temp fix */
/*
* Update state offset to INPUT_ONLY if necessary
@@ -542,10 +540,13 @@ set_tcp_state(struct ip_vs_protocol *pp, struct ip_vs_conn *cp,
}
}
- cp->timeout = pp->timeout_table[cp->state = new_state];
+ pd = ip_vs_proto_data_get(&init_net, pp->protocol);
+ if (likely(pd))
+ cp->timeout = pd->timeout_table[cp->state = new_state];
+ else /* What to do ? */
+ cp->timeout = tcp_timeouts[cp->state = new_state];
}
-
/*
* Handle state transitions
*/
@@ -573,17 +574,6 @@ tcp_state_transition(struct ip_vs_conn *cp, int direction,
return 1;
}
-
-/*
- * Hash table for TCP application incarnations
- */
-#define TCP_APP_TAB_BITS 4
-#define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
-#define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
-
-static struct list_head tcp_apps[TCP_APP_TAB_SIZE];
-static DEFINE_SPINLOCK(tcp_app_lock);
-
static inline __u16 tcp_app_hashkey(__be16 port)
{
return (((__force u16)port >> TCP_APP_TAB_BITS) ^ (__force u16)port)
@@ -597,21 +587,23 @@ static int tcp_register_app(struct ip_vs_app *inc)
__u16 hash;
__be16 port = inc->port;
int ret = 0;
+ struct netns_ipvs *ipvs = net_ipvs(&init_net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP);
hash = tcp_app_hashkey(port);
- spin_lock_bh(&tcp_app_lock);
- list_for_each_entry(i, &tcp_apps[hash], p_list) {
+ spin_lock_bh(&ipvs->tcp_app_lock);
+ list_for_each_entry(i, &ipvs->tcp_apps[hash], p_list) {
if (i->port == port) {
ret = -EEXIST;
goto out;
}
}
- list_add(&inc->p_list, &tcp_apps[hash]);
- atomic_inc(&ip_vs_protocol_tcp.appcnt);
+ list_add(&inc->p_list, &ipvs->tcp_apps[hash]);
+ atomic_inc(&pd->pp->appcnt);
out:
- spin_unlock_bh(&tcp_app_lock);
+ spin_unlock_bh(&ipvs->tcp_app_lock);
return ret;
}
@@ -619,16 +611,20 @@ static int tcp_register_app(struct ip_vs_app *inc)
static void
tcp_unregister_app(struct ip_vs_app *inc)
{
- spin_lock_bh(&tcp_app_lock);
- atomic_dec(&ip_vs_protocol_tcp.appcnt);
+ struct netns_ipvs *ipvs = net_ipvs(&init_net);
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(&init_net, IPPROTO_TCP);
+
+ spin_lock_bh(&ipvs->tcp_app_lock);
+ atomic_dec(&pd->pp->appcnt);
list_del(&inc->p_list);
- spin_unlock_bh(&tcp_app_lock);
+ spin_unlock_bh(&ipvs->tcp_app_lock);
}
static int
tcp_app_conn_bind(struct ip_vs_conn *cp)
{
+ struct netns_ipvs *ipvs = net_ipvs(&init_net);
int hash;
struct ip_vs_app *inc;
int result = 0;
@@ -640,12 +636,12 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
/* Lookup application incarnations and bind the right one */
hash = tcp_app_hashkey(cp->vport);
- spin_lock(&tcp_app_lock);
- list_for_each_entry(inc, &tcp_apps[hash], p_list) {
+ spin_lock(&ipvs->tcp_app_lock);
+ list_for_each_entry(inc, &ipvs->tcp_apps[hash], p_list) {
if (inc->port == cp->vport) {
if (unlikely(!ip_vs_app_inc_get(inc)))
break;
- spin_unlock(&tcp_app_lock);
+ spin_unlock(&ipvs->tcp_app_lock);
IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->"
"%s:%u to app %s on port %u\n",
@@ -662,7 +658,7 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
goto out;
}
}
- spin_unlock(&tcp_app_lock);
+ spin_unlock(&ipvs->tcp_app_lock);
out:
return result;
@@ -672,24 +668,34 @@ tcp_app_conn_bind(struct ip_vs_conn *cp)
/*
* Set LISTEN timeout. (ip_vs_conn_put will setup timer)
*/
-void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp)
+void ip_vs_tcp_conn_listen(struct net *net, struct ip_vs_conn *cp)
{
+ struct ip_vs_proto_data *pd = ip_vs_proto_data_get(net, IPPROTO_TCP);
+
spin_lock(&cp->lock);
cp->state = IP_VS_TCP_S_LISTEN;
- cp->timeout = ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_LISTEN];
+ cp->timeout = (pd ? pd->timeout_table[IP_VS_TCP_S_LISTEN]
+ : tcp_timeouts[IP_VS_TCP_S_LISTEN]);
spin_unlock(&cp->lock);
}
-
-static void ip_vs_tcp_init(struct ip_vs_protocol *pp)
+/* ---------------------------------------------
+ * timeouts is netns related now.
+ * ---------------------------------------------
+ */
+static void __ip_vs_tcp_init(struct net *net, struct ip_vs_proto_data *pd)
{
- IP_VS_INIT_HASH_TABLE(tcp_apps);
- pp->timeout_table = tcp_timeouts;
-}
+ struct netns_ipvs *ipvs = net_ipvs(net);
+ ip_vs_init_hash_table(ipvs->tcp_apps, TCP_APP_TAB_SIZE);
+ spin_lock_init(&ipvs->tcp_app_lock);
+ pd->timeout_table = ip_vs_create_timeout_table((int *)tcp_timeouts,
+ sizeof(tcp_timeouts));
+}
-static void ip_vs_tcp_exit(struct ip_vs_protocol *pp)
+static void __ip_vs_tcp_exit(struct net *net, struct ip_vs_proto_data *pd)
{
+ kfree(pd->timeout_table);
}
@@ -699,8 +705,10 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
.num_states = IP_VS_TCP_S_LAST,
.dont_defrag = 0,
.appcnt = ATOMIC_INIT(0),
- .init = ip_vs_tcp_init,
- .exit = ip_vs_tcp_exit,
+ .init = NULL,
+ .exit = NULL,
+ .init_netns = __ip_vs_tcp_init,
+ .exit_netns = __ip_vs_tcp_exit,
.register_app = tcp_register_app,
.unregister_app = tcp_unregister_app,
.conn_schedule = tcp_conn_schedule,
@@ -714,5 +722,4 @@ struct ip_vs_protocol ip_vs_protocol_tcp = {
.app_conn_bind = tcp_app_conn_bind,
.debug_packet = ip_vs_tcpudp_debug_packet,
.timeout_change = tcp_timeout_change,
- .set_state_timeout = tcp_set_state_timeout,
};
--
1.7.2.3
^ permalink raw reply related
* [PATCH 37/79] IPVS: netns awarness to lblc sheduler
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
var sysctl_ip_vs_lblc_expiration moved to ipvs struct as
sysctl_lblc_expiration
procfs updated to handle this.
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/netns/ip_vs.h | 4 +++
net/netfilter/ipvs/ip_vs_lblc.c | 50 ++++++++++++++++++++++++++------------
2 files changed, 38 insertions(+), 16 deletions(-)
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
index 51a92ee..d14581c 100644
--- a/include/net/netns/ip_vs.h
+++ b/include/net/netns/ip_vs.h
@@ -29,6 +29,10 @@ struct netns_ipvs {
struct list_head rs_table[IP_VS_RTAB_SIZE];
+ /* ip_vs_lblc */
+ int sysctl_lblc_expiration;
+ struct ctl_table_header *lblc_ctl_header;
+ struct ctl_table *lblc_ctl_table;
/* ip_vs_lblcr */
int sysctl_lblcr_expiration;
struct ctl_table_header *lblcr_ctl_header;
diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c
index 84278fb..d5bec33 100644
--- a/net/netfilter/ipvs/ip_vs_lblc.c
+++ b/net/netfilter/ipvs/ip_vs_lblc.c
@@ -70,7 +70,6 @@
* entries that haven't been touched for a day.
*/
#define COUNT_FOR_FULL_EXPIRATION 30
-static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ;
/*
@@ -117,7 +116,7 @@ struct ip_vs_lblc_table {
static ctl_table vs_vars_table[] = {
{
.procname = "lblc_expiration",
- .data = &sysctl_ip_vs_lblc_expiration,
+ .data = NULL,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
@@ -125,8 +124,6 @@ static ctl_table vs_vars_table[] = {
{ }
};
-static struct ctl_table_header * sysctl_header;
-
static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
{
list_del(&en->list);
@@ -248,6 +245,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
struct ip_vs_lblc_entry *en, *nxt;
unsigned long now = jiffies;
int i, j;
+ struct netns_ipvs *ipvs = net_ipvs(svc->net);
for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
@@ -255,7 +253,8 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
write_lock(&svc->sched_lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
if (time_before(now,
- en->lastuse + sysctl_ip_vs_lblc_expiration))
+ en->lastuse +
+ ipvs->sysctl_lblc_expiration))
continue;
ip_vs_lblc_free(en);
@@ -548,23 +547,43 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
*/
static int __net_init __ip_vs_lblc_init(struct net *net)
{
- if (!net_eq(net, &init_net)) /* netns not enabled yet */
- return -EPERM;
-
- sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path,
- vs_vars_table);
- if (!sysctl_header)
- return -ENOMEM;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ if (!net_eq(net, &init_net)) {
+ ipvs->lblc_ctl_table = kmemdup(vs_vars_table,
+ sizeof(vs_vars_table),
+ GFP_KERNEL);
+ if (ipvs->lblc_ctl_table == NULL)
+ goto err_dup;
+ } else
+ ipvs->lblc_ctl_table = vs_vars_table;
+ ipvs->sysctl_lblc_expiration = 24*60*60*HZ;
+ ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration;
+
+ ipvs->lblc_ctl_header =
+ register_net_sysctl_table(net, net_vs_ctl_path,
+ ipvs->lblc_ctl_table);
+ if (!ipvs->lblc_ctl_header)
+ goto err_reg;
return 0;
+
+err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(ipvs->lblc_ctl_table);
+
+err_dup:
+ return -ENOMEM;
}
static void __net_exit __ip_vs_lblc_exit(struct net *net)
{
- if (!net_eq(net, &init_net)) /* netns not enabled yet */
- return;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ unregister_net_sysctl_table(ipvs->lblc_ctl_header);
- unregister_net_sysctl_table(sysctl_header);
+ if (!net_eq(net, &init_net))
+ kfree(ipvs->lblc_ctl_table);
}
static struct pernet_operations ip_vs_lblc_ops = {
@@ -586,7 +605,6 @@ static int __init ip_vs_lblc_init(void)
return ret;
}
-
static void __exit ip_vs_lblc_cleanup(void)
{
unregister_ip_vs_scheduler(&ip_vs_lblc_scheduler);
--
1.7.2.3
^ permalink raw reply related
* [PATCH 36/79] IPVS: netns awarness to lblcr sheduler
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
var sysctl_ip_vs_lblcr_expiration moved to ipvs struct as
sysctl_lblcr_expiration
procfs updated to handle this.
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/netns/ip_vs.h | 5 +++
net/netfilter/ipvs/ip_vs_lblcr.c | 54 +++++++++++++++++++++++++------------
2 files changed, 41 insertions(+), 18 deletions(-)
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
index 5b87d22..51a92ee 100644
--- a/include/net/netns/ip_vs.h
+++ b/include/net/netns/ip_vs.h
@@ -28,6 +28,11 @@ struct netns_ipvs {
#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
struct list_head rs_table[IP_VS_RTAB_SIZE];
+
+ /* ip_vs_lblcr */
+ int sysctl_lblcr_expiration;
+ struct ctl_table_header *lblcr_ctl_header;
+ struct ctl_table *lblcr_ctl_table;
};
#endif /* IP_VS_H_ */
diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c
index 7c7396a..61ae8cf 100644
--- a/net/netfilter/ipvs/ip_vs_lblcr.c
+++ b/net/netfilter/ipvs/ip_vs_lblcr.c
@@ -70,8 +70,6 @@
* entries that haven't been touched for a day.
*/
#define COUNT_FOR_FULL_EXPIRATION 30
-static int sysctl_ip_vs_lblcr_expiration = 24*60*60*HZ;
-
/*
* for IPVS lblcr entry hash table
@@ -296,7 +294,7 @@ struct ip_vs_lblcr_table {
static ctl_table vs_vars_table[] = {
{
.procname = "lblcr_expiration",
- .data = &sysctl_ip_vs_lblcr_expiration,
+ .data = NULL,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
@@ -304,8 +302,6 @@ static ctl_table vs_vars_table[] = {
{ }
};
-static struct ctl_table_header * sysctl_header;
-
static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
{
list_del(&en->list);
@@ -425,14 +421,15 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
unsigned long now = jiffies;
int i, j;
struct ip_vs_lblcr_entry *en, *nxt;
+ struct netns_ipvs *ipvs = net_ipvs(svc->net);
for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
write_lock(&svc->sched_lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
- if (time_after(en->lastuse+sysctl_ip_vs_lblcr_expiration,
- now))
+ if (time_after(en->lastuse
+ + ipvs->sysctl_lblcr_expiration, now))
continue;
ip_vs_lblcr_free(en);
@@ -664,6 +661,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
read_lock(&svc->sched_lock);
en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
if (en) {
+ struct netns_ipvs *ipvs = net_ipvs(svc->net);
/* We only hold a read lock, but this is atomic */
en->lastuse = jiffies;
@@ -675,7 +673,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
/* More than one destination + enough time passed by, cleanup */
if (atomic_read(&en->set.size) > 1 &&
time_after(jiffies, en->set.lastmod +
- sysctl_ip_vs_lblcr_expiration)) {
+ ipvs->sysctl_lblcr_expiration)) {
struct ip_vs_dest *m;
write_lock(&en->set.lock);
@@ -749,23 +747,43 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
*/
static int __net_init __ip_vs_lblcr_init(struct net *net)
{
- if (!net_eq(net, &init_net)) /* netns not enabled yet */
- return -EPERM;
-
- sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path,
- vs_vars_table);
- if (!sysctl_header)
- return -ENOMEM;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ if (!net_eq(net, &init_net)) {
+ ipvs->lblcr_ctl_table = kmemdup(vs_vars_table,
+ sizeof(vs_vars_table),
+ GFP_KERNEL);
+ if (ipvs->lblcr_ctl_table == NULL)
+ goto err_dup;
+ } else
+ ipvs->lblcr_ctl_table = vs_vars_table;
+ ipvs->sysctl_lblcr_expiration = 24*60*60*HZ;
+ ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration;
+
+ ipvs->lblcr_ctl_header =
+ register_net_sysctl_table(net, net_vs_ctl_path,
+ ipvs->lblcr_ctl_table);
+ if (!ipvs->lblcr_ctl_header)
+ goto err_reg;
return 0;
+
+err_reg:
+ if (!net_eq(net, &init_net))
+ kfree(ipvs->lblcr_ctl_table);
+
+err_dup:
+ return -ENOMEM;
}
static void __net_exit __ip_vs_lblcr_exit(struct net *net)
{
- if (!net_eq(net, &init_net)) /* netns not enabled yet */
- return;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
+ unregister_net_sysctl_table(ipvs->lblcr_ctl_header);
- unregister_net_sysctl_table(sysctl_header);
+ if (!net_eq(net, &init_net))
+ kfree(ipvs->lblcr_ctl_table);
}
static struct pernet_operations ip_vs_lblcr_ops = {
--
1.7.2.3
^ permalink raw reply related
* [PATCH 35/79] IPVS: netns to services part 1
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
Services hash tables got netns ptr a hash arg,
While Real Servers (rs) has been moved to ipvs struct.
Two new inline functions added to get net ptr from skb.
Since ip_vs is called from different contexts there is two
places to dig for the net ptr skb->dev or skb->sk
this is handled in skb_net() and skb_sknet()
Global functions, ip_vs_service_get() ip_vs_lookup_real_service()
etc have got struct net *net as first param.
If possible get net ptr skb etc,
- if not &init_net is used at this early stage of patching.
ip_vs_ctl.c procfs not ready for netns yet.
*v3
Comments by Julian
- __ip_vs_service_find and __ip_vs_svc_fwm_find are fast path,
net_eq(svc->net, net) so the check is at the end now.
- net = skb_net(skb) in ip_vs_out moved after check for skb_dst.
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
include/net/ip_vs.h | 64 +++++++++-
include/net/netns/ip_vs.h | 8 +
net/netfilter/ipvs/ip_vs_conn.c | 2 +-
net/netfilter/ipvs/ip_vs_core.c | 4 +-
net/netfilter/ipvs/ip_vs_ctl.c | 232 +++++++++++++++++++--------------
net/netfilter/ipvs/ip_vs_proto_sctp.c | 5 +-
net/netfilter/ipvs/ip_vs_proto_tcp.c | 7 +-
net/netfilter/ipvs/ip_vs_proto_udp.c | 5 +-
net/netfilter/ipvs/ip_vs_sync.c | 2 +-
9 files changed, 214 insertions(+), 115 deletions(-)
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index c1c2ece..d551e0d 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -37,6 +37,59 @@ static inline struct netns_ipvs *net_ipvs(struct net* net)
{
return net->ipvs;
}
+/*
+ * Get net ptr from skb in traffic cases
+ * use skb_sknet when call is from userland (ioctl or netlink)
+ */
+static inline struct net *skb_net(struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_NS
+#ifdef CONFIG_IP_VS_DEBUG
+ /*
+ * This is used for debug only.
+ * Start with the most likely hit
+ * End with BUG
+ */
+ if (likely(skb->dev && skb->dev->nd_net))
+ return dev_net(skb->dev);
+ if (skb_dst(skb)->dev)
+ return dev_net(skb_dst(skb)->dev);
+ WARN(skb->sk, "Maybe skb_sknet should be used in %s() at line:%d\n",
+ __func__, __LINE__);
+ if (likely(skb->sk && skb->sk->sk_net))
+ return sock_net(skb->sk);
+ pr_err("There is no net ptr to find in the skb in %s() line:%d\n",
+ __func__, __LINE__);
+ BUG();
+#else
+ return dev_net(skb->dev ? : skb_dst(skb)->dev);
+#endif
+#else
+ return &init_net;
+#endif
+}
+
+static inline struct net *skb_sknet(struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_NS
+#ifdef CONFIG_IP_VS_DEBUG
+ /* Start with the most likely hit */
+ if (likely(skb->sk && skb->sk->sk_net))
+ return sock_net(skb->sk);
+ WARN(skb->dev, "Maybe skb_net should be used instead in %s() line:%d\n",
+ __func__, __LINE__);
+ if (likely(skb->dev && skb->dev->nd_net))
+ return dev_net(skb->dev);
+ pr_err("There is no net ptr to find in the skb in %s() line:%d\n",
+ __func__, __LINE__);
+ BUG();
+#else
+ return sock_net(skb->sk);
+#endif
+#else
+ return &init_net;
+#endif
+}
/* Connections' size value needed by ip_vs_ctl.c */
extern int ip_vs_conn_tab_size;
@@ -496,6 +549,7 @@ struct ip_vs_service {
unsigned flags; /* service status flags */
unsigned timeout; /* persistent timeout in ticks */
__be32 netmask; /* grouping granularity */
+ struct net *net;
struct list_head destinations; /* real server d-linked list */
__u32 num_dests; /* number of servers */
@@ -896,7 +950,7 @@ extern int sysctl_ip_vs_sync_ver;
extern void ip_vs_sync_switch_mode(int mode);
extern struct ip_vs_service *
-ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport);
static inline void ip_vs_service_put(struct ip_vs_service *svc)
@@ -905,7 +959,7 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc)
}
extern struct ip_vs_dest *
-ip_vs_lookup_real_service(int af, __u16 protocol,
+ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
const union nf_inet_addr *daddr, __be16 dport);
extern int ip_vs_use_count_inc(void);
@@ -913,9 +967,9 @@ extern void ip_vs_use_count_dec(void);
extern int ip_vs_control_init(void);
extern void ip_vs_control_cleanup(void);
extern struct ip_vs_dest *
-ip_vs_find_dest(int af, const union nf_inet_addr *daddr, __be16 dport,
- const union nf_inet_addr *vaddr, __be16 vport, __u16 protocol,
- __u32 fwmark);
+ip_vs_find_dest(struct net *net, int af, const union nf_inet_addr *daddr,
+ __be16 dport, const union nf_inet_addr *vaddr, __be16 vport,
+ __u16 protocol, __u32 fwmark);
extern struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp);
diff --git a/include/net/netns/ip_vs.h b/include/net/netns/ip_vs.h
index 12fe840..5b87d22 100644
--- a/include/net/netns/ip_vs.h
+++ b/include/net/netns/ip_vs.h
@@ -20,6 +20,14 @@ struct ctl_table_header;
struct netns_ipvs {
int gen; /* Generation */
+ /*
+ * Hash table: for real service lookups
+ */
+ #define IP_VS_RTAB_BITS 4
+ #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
+ #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
+
+ struct list_head rs_table[IP_VS_RTAB_SIZE];
};
#endif /* IP_VS_H_ */
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 7c1b502..7a0e79e 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -611,7 +611,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
struct ip_vs_dest *dest;
if ((cp) && (!cp->dest)) {
- dest = ip_vs_find_dest(cp->af, &cp->daddr, cp->dport,
+ dest = ip_vs_find_dest(&init_net, cp->af, &cp->daddr, cp->dport,
&cp->vaddr, cp->vport,
cp->protocol, cp->fwmark);
ip_vs_bind_dest(cp, dest);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 206f40c..d0616ea 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1031,6 +1031,7 @@ drop:
static unsigned int
ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
{
+ struct net *net = NULL;
struct ip_vs_iphdr iph;
struct ip_vs_protocol *pp;
struct ip_vs_conn *cp;
@@ -1054,6 +1055,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
if (unlikely(!skb_dst(skb)))
return NF_ACCEPT;
+ net = skb_net(skb);
ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
@@ -1119,7 +1121,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
sizeof(_ports), _ports);
if (pptr == NULL)
return NF_ACCEPT; /* Not for me */
- if (ip_vs_lookup_real_service(af, iph.protocol,
+ if (ip_vs_lookup_real_service(net, af, iph.protocol,
&iph.saddr,
pptr[0])) {
/*
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index ceeef43..2d7c96b 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -288,15 +288,6 @@ static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
/*
- * Hash table: for real service lookups
- */
-#define IP_VS_RTAB_BITS 4
-#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
-#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
-
-static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
-
-/*
* Trash for destinations
*/
static LIST_HEAD(ip_vs_dest_trash);
@@ -311,9 +302,9 @@ static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
/*
* Returns hash value for virtual service
*/
-static __inline__ unsigned
-ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
- __be16 port)
+static inline unsigned
+ip_vs_svc_hashkey(struct net *net, int af, unsigned proto,
+ const union nf_inet_addr *addr, __be16 port)
{
register unsigned porth = ntohs(port);
__be32 addr_fold = addr->ip;
@@ -323,6 +314,7 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
addr_fold = addr->ip6[0]^addr->ip6[1]^
addr->ip6[2]^addr->ip6[3];
#endif
+ addr_fold ^= ((size_t)net>>8);
return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
& IP_VS_SVC_TAB_MASK;
@@ -331,13 +323,13 @@ ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
/*
* Returns hash value of fwmark for virtual service lookup
*/
-static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
+static inline unsigned ip_vs_svc_fwm_hashkey(struct net *net, __u32 fwmark)
{
- return fwmark & IP_VS_SVC_TAB_MASK;
+ return (((size_t)net>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK;
}
/*
- * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
+ * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port>
* or in the ip_vs_svc_fwm_table by fwmark.
* Should be called with locked tables.
*/
@@ -353,16 +345,16 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
if (svc->fwmark == 0) {
/*
- * Hash it by <protocol,addr,port> in ip_vs_svc_table
+ * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table
*/
- hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
- svc->port);
+ hash = ip_vs_svc_hashkey(svc->net, svc->af, svc->protocol,
+ &svc->addr, svc->port);
list_add(&svc->s_list, &ip_vs_svc_table[hash]);
} else {
/*
- * Hash it by fwmark in ip_vs_svc_fwm_table
+ * Hash it by fwmark in svc_fwm_table
*/
- hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
+ hash = ip_vs_svc_fwm_hashkey(svc->net, svc->fwmark);
list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
}
@@ -374,7 +366,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc)
/*
- * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
+ * Unhashes a service from svc_table / svc_fwm_table.
* Should be called with locked tables.
*/
static int ip_vs_svc_unhash(struct ip_vs_service *svc)
@@ -386,10 +378,10 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
}
if (svc->fwmark == 0) {
- /* Remove it from the ip_vs_svc_table table */
+ /* Remove it from the svc_table table */
list_del(&svc->s_list);
} else {
- /* Remove it from the ip_vs_svc_fwm_table table */
+ /* Remove it from the svc_fwm_table table */
list_del(&svc->f_list);
}
@@ -400,23 +392,24 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc)
/*
- * Get service by {proto,addr,port} in the service table.
+ * Get service by {netns, proto,addr,port} in the service table.
*/
static inline struct ip_vs_service *
-__ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
- __be16 vport)
+__ip_vs_service_find(struct net *net, int af, __u16 protocol,
+ const union nf_inet_addr *vaddr, __be16 vport)
{
unsigned hash;
struct ip_vs_service *svc;
/* Check for "full" addressed entries */
- hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
+ hash = ip_vs_svc_hashkey(net, af, protocol, vaddr, vport);
list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
if ((svc->af == af)
&& ip_vs_addr_equal(af, &svc->addr, vaddr)
&& (svc->port == vport)
- && (svc->protocol == protocol)) {
+ && (svc->protocol == protocol)
+ && net_eq(svc->net, net)) {
/* HIT */
return svc;
}
@@ -430,16 +423,17 @@ __ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
* Get service by {fwmark} in the service table.
*/
static inline struct ip_vs_service *
-__ip_vs_svc_fwm_find(int af, __u32 fwmark)
+__ip_vs_svc_fwm_find(struct net *net, int af, __u32 fwmark)
{
unsigned hash;
struct ip_vs_service *svc;
/* Check for fwmark addressed entries */
- hash = ip_vs_svc_fwm_hashkey(fwmark);
+ hash = ip_vs_svc_fwm_hashkey(net, fwmark);
list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
- if (svc->fwmark == fwmark && svc->af == af) {
+ if (svc->fwmark == fwmark && svc->af == af
+ && net_eq(svc->net, net)) {
/* HIT */
return svc;
}
@@ -449,7 +443,7 @@ __ip_vs_svc_fwm_find(int af, __u32 fwmark)
}
struct ip_vs_service *
-ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
+ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
const union nf_inet_addr *vaddr, __be16 vport)
{
struct ip_vs_service *svc;
@@ -459,14 +453,15 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
/*
* Check the table hashed by fwmark first
*/
- if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark)))
+ svc = __ip_vs_svc_fwm_find(net, af, fwmark);
+ if (fwmark && svc)
goto out;
/*
* Check the table hashed by <protocol,addr,port>
* for "full" addressed entries
*/
- svc = __ip_vs_service_find(af, protocol, vaddr, vport);
+ svc = __ip_vs_service_find(net, af, protocol, vaddr, vport);
if (svc == NULL
&& protocol == IPPROTO_TCP
@@ -476,7 +471,7 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
* Check if ftp service entry exists, the packet
* might belong to FTP data connections.
*/
- svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT);
+ svc = __ip_vs_service_find(net, af, protocol, vaddr, FTPPORT);
}
if (svc == NULL
@@ -484,7 +479,7 @@ ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
/*
* Check if the catch-all port (port zero) exists
*/
- svc = __ip_vs_service_find(af, protocol, vaddr, 0);
+ svc = __ip_vs_service_find(net, af, protocol, vaddr, 0);
}
out:
@@ -545,10 +540,10 @@ static inline unsigned ip_vs_rs_hashkey(int af,
}
/*
- * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
+ * Hashes ip_vs_dest in rs_table by <proto,addr,port>.
* should be called with locked tables.
*/
-static int ip_vs_rs_hash(struct ip_vs_dest *dest)
+static int ip_vs_rs_hash(struct netns_ipvs *ipvs, struct ip_vs_dest *dest)
{
unsigned hash;
@@ -562,19 +557,19 @@ static int ip_vs_rs_hash(struct ip_vs_dest *dest)
*/
hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
- list_add(&dest->d_list, &ip_vs_rtable[hash]);
+ list_add(&dest->d_list, &ipvs->rs_table[hash]);
return 1;
}
/*
- * UNhashes ip_vs_dest from ip_vs_rtable.
+ * UNhashes ip_vs_dest from rs_table.
* should be called with locked tables.
*/
static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
{
/*
- * Remove it from the ip_vs_rtable table.
+ * Remove it from the rs_table table.
*/
if (!list_empty(&dest->d_list)) {
list_del(&dest->d_list);
@@ -588,10 +583,11 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
* Lookup real service by <proto,addr,port> in the real service table.
*/
struct ip_vs_dest *
-ip_vs_lookup_real_service(int af, __u16 protocol,
+ip_vs_lookup_real_service(struct net *net, int af, __u16 protocol,
const union nf_inet_addr *daddr,
__be16 dport)
{
+ struct netns_ipvs *ipvs = net_ipvs(net);
unsigned hash;
struct ip_vs_dest *dest;
@@ -602,7 +598,7 @@ ip_vs_lookup_real_service(int af, __u16 protocol,
hash = ip_vs_rs_hashkey(af, daddr, dport);
read_lock(&__ip_vs_rs_lock);
- list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
+ list_for_each_entry(dest, &ipvs->rs_table[hash], d_list) {
if ((dest->af == af)
&& ip_vs_addr_equal(af, &dest->addr, daddr)
&& (dest->port == dport)
@@ -652,7 +648,8 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
* ip_vs_lookup_real_service() looked promissing, but
* seems not working as expected.
*/
-struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
+struct ip_vs_dest *ip_vs_find_dest(struct net *net, int af,
+ const union nf_inet_addr *daddr,
__be16 dport,
const union nf_inet_addr *vaddr,
__be16 vport, __u16 protocol, __u32 fwmark)
@@ -660,7 +657,7 @@ struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
struct ip_vs_dest *dest;
struct ip_vs_service *svc;
- svc = ip_vs_service_get(af, fwmark, protocol, vaddr, vport);
+ svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);
if (!svc)
return NULL;
dest = ip_vs_lookup_dest(svc, daddr, dport);
@@ -768,6 +765,7 @@ static void
__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
struct ip_vs_dest_user_kern *udest, int add)
{
+ struct netns_ipvs *ipvs = net_ipvs(svc->net);
int conn_flags;
/* set the weight and the flags */
@@ -780,11 +778,11 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
conn_flags |= IP_VS_CONN_F_NOOUTPUT;
} else {
/*
- * Put the real service in ip_vs_rtable if not present.
+ * Put the real service in rs_table if not present.
* For now only for NAT!
*/
write_lock_bh(&__ip_vs_rs_lock);
- ip_vs_rs_hash(dest);
+ ip_vs_rs_hash(ipvs, dest);
write_unlock_bh(&__ip_vs_rs_lock);
}
atomic_set(&dest->conn_flags, conn_flags);
@@ -1117,7 +1115,7 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
* Add a service into the service hash table
*/
static int
-ip_vs_add_service(struct ip_vs_service_user_kern *u,
+ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
struct ip_vs_service **svc_p)
{
int ret = 0;
@@ -1172,6 +1170,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u,
svc->flags = u->flags;
svc->timeout = u->timeout * HZ;
svc->netmask = u->netmask;
+ svc->net = net;
INIT_LIST_HEAD(&svc->destinations);
rwlock_init(&svc->sched_lock);
@@ -1428,17 +1427,19 @@ static int ip_vs_del_service(struct ip_vs_service *svc)
/*
* Flush all the virtual services
*/
-static int ip_vs_flush(void)
+static int ip_vs_flush(struct net *net)
{
int idx;
struct ip_vs_service *svc, *nxt;
/*
- * Flush the service table hashed by <protocol,addr,port>
+ * Flush the service table hashed by <netns,protocol,addr,port>
*/
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
- ip_vs_unlink_service(svc);
+ list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx],
+ s_list) {
+ if (net_eq(svc->net, net))
+ ip_vs_unlink_service(svc);
}
}
@@ -1448,7 +1449,8 @@ static int ip_vs_flush(void)
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry_safe(svc, nxt,
&ip_vs_svc_fwm_table[idx], f_list) {
- ip_vs_unlink_service(svc);
+ if (net_eq(svc->net, net))
+ ip_vs_unlink_service(svc);
}
}
@@ -1472,20 +1474,22 @@ static int ip_vs_zero_service(struct ip_vs_service *svc)
return 0;
}
-static int ip_vs_zero_all(void)
+static int ip_vs_zero_all(struct net *net)
{
int idx;
struct ip_vs_service *svc;
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
- ip_vs_zero_service(svc);
+ if (net_eq(svc->net, net))
+ ip_vs_zero_service(svc);
}
}
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
- ip_vs_zero_service(svc);
+ if (net_eq(svc->net, net))
+ ip_vs_zero_service(svc);
}
}
@@ -1763,6 +1767,7 @@ static struct ctl_table_header * sysctl_header;
#ifdef CONFIG_PROC_FS
struct ip_vs_iter {
+ struct seq_net_private p; /* Do not move this, netns depends upon it*/
struct list_head *table;
int bucket;
};
@@ -1789,6 +1794,7 @@ static inline const char *ip_vs_fwd_name(unsigned flags)
/* Get the Nth entry in the two lists */
static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
{
+ struct net *net = seq_file_net(seq);
struct ip_vs_iter *iter = seq->private;
int idx;
struct ip_vs_service *svc;
@@ -1796,7 +1802,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
/* look in hash by protocol */
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
- if (pos-- == 0){
+ if (net_eq(svc->net, net) && pos-- == 0) {
iter->table = ip_vs_svc_table;
iter->bucket = idx;
return svc;
@@ -1807,7 +1813,7 @@ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
/* keep looking in fwmark */
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
- if (pos-- == 0) {
+ if (net_eq(svc->net, net) && pos-- == 0) {
iter->table = ip_vs_svc_fwm_table;
iter->bucket = idx;
return svc;
@@ -1961,7 +1967,7 @@ static const struct seq_operations ip_vs_info_seq_ops = {
static int ip_vs_info_open(struct inode *inode, struct file *file)
{
- return seq_open_private(file, &ip_vs_info_seq_ops,
+ return seq_open_net(inode, file, &ip_vs_info_seq_ops,
sizeof(struct ip_vs_iter));
}
@@ -2011,7 +2017,7 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
{
- return single_open(file, ip_vs_stats_show, NULL);
+ return single_open_net(inode, file, ip_vs_stats_show);
}
static const struct file_operations ip_vs_stats_fops = {
@@ -2113,6 +2119,7 @@ static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
static int
do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
{
+ struct net *net = sock_net(sk);
int ret;
unsigned char arg[MAX_ARG_LEN];
struct ip_vs_service_user *usvc_compat;
@@ -2147,7 +2154,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
if (cmd == IP_VS_SO_SET_FLUSH) {
/* Flush the virtual service */
- ret = ip_vs_flush();
+ ret = ip_vs_flush(net);
goto out_unlock;
} else if (cmd == IP_VS_SO_SET_TIMEOUT) {
/* Set timeout values for (tcp tcpfin udp) */
@@ -2174,7 +2181,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
if (cmd == IP_VS_SO_SET_ZERO) {
/* if no service address is set, zero counters in all */
if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
- ret = ip_vs_zero_all();
+ ret = ip_vs_zero_all(net);
goto out_unlock;
}
}
@@ -2191,10 +2198,10 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
/* Lookup the exact service by <protocol, addr, port> or fwmark */
if (usvc.fwmark == 0)
- svc = __ip_vs_service_find(usvc.af, usvc.protocol,
+ svc = __ip_vs_service_find(net, usvc.af, usvc.protocol,
&usvc.addr, usvc.port);
else
- svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark);
+ svc = __ip_vs_svc_fwm_find(net, usvc.af, usvc.fwmark);
if (cmd != IP_VS_SO_SET_ADD
&& (svc == NULL || svc->protocol != usvc.protocol)) {
@@ -2207,7 +2214,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
if (svc != NULL)
ret = -EEXIST;
else
- ret = ip_vs_add_service(&usvc, &svc);
+ ret = ip_vs_add_service(net, &usvc, &svc);
break;
case IP_VS_SO_SET_EDIT:
ret = ip_vs_edit_service(svc, &usvc);
@@ -2267,7 +2274,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
}
static inline int
-__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
+__ip_vs_get_service_entries(struct net *net,
+ const struct ip_vs_get_services *get,
struct ip_vs_get_services __user *uptr)
{
int idx, count=0;
@@ -2278,7 +2286,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
/* Only expose IPv4 entries to old interface */
- if (svc->af != AF_INET)
+ if (svc->af != AF_INET || !net_eq(svc->net, net))
continue;
if (count >= get->num_services)
@@ -2297,7 +2305,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
/* Only expose IPv4 entries to old interface */
- if (svc->af != AF_INET)
+ if (svc->af != AF_INET || !net_eq(svc->net, net))
continue;
if (count >= get->num_services)
@@ -2317,7 +2325,7 @@ __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
}
static inline int
-__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
+__ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
struct ip_vs_get_dests __user *uptr)
{
struct ip_vs_service *svc;
@@ -2325,9 +2333,9 @@ __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
int ret = 0;
if (get->fwmark)
- svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark);
+ svc = __ip_vs_svc_fwm_find(net, AF_INET, get->fwmark);
else
- svc = __ip_vs_service_find(AF_INET, get->protocol, &addr,
+ svc = __ip_vs_service_find(net, AF_INET, get->protocol, &addr,
get->port);
if (svc) {
@@ -2401,7 +2409,9 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
unsigned char arg[128];
int ret = 0;
unsigned int copylen;
+ struct net *net = sock_net(sk);
+ BUG_ON(!net);
if (!capable(CAP_NET_ADMIN))
return -EPERM;
@@ -2463,7 +2473,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
ret = -EINVAL;
goto out;
}
- ret = __ip_vs_get_service_entries(get, user);
+ ret = __ip_vs_get_service_entries(net, get, user);
}
break;
@@ -2476,10 +2486,11 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
entry = (struct ip_vs_service_entry *)arg;
addr.ip = entry->addr;
if (entry->fwmark)
- svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark);
+ svc = __ip_vs_svc_fwm_find(net, AF_INET, entry->fwmark);
else
- svc = __ip_vs_service_find(AF_INET, entry->protocol,
- &addr, entry->port);
+ svc = __ip_vs_service_find(net, AF_INET,
+ entry->protocol, &addr,
+ entry->port);
if (svc) {
ip_vs_copy_service(entry, svc);
if (copy_to_user(user, entry, sizeof(*entry)) != 0)
@@ -2502,7 +2513,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
ret = -EINVAL;
goto out;
}
- ret = __ip_vs_get_dest_entries(get, user);
+ ret = __ip_vs_get_dest_entries(net, get, user);
}
break;
@@ -2722,11 +2733,12 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
int idx = 0, i;
int start = cb->args[0];
struct ip_vs_service *svc;
+ struct net *net = skb_sknet(skb);
mutex_lock(&__ip_vs_mutex);
for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
- if (++idx <= start)
+ if (++idx <= start || !net_eq(svc->net, net))
continue;
if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
idx--;
@@ -2737,7 +2749,7 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb,
for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
- if (++idx <= start)
+ if (++idx <= start || !net_eq(svc->net, net))
continue;
if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
idx--;
@@ -2753,7 +2765,8 @@ nla_put_failure:
return skb->len;
}
-static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
+static int ip_vs_genl_parse_service(struct net *net,
+ struct ip_vs_service_user_kern *usvc,
struct nlattr *nla, int full_entry,
struct ip_vs_service **ret_svc)
{
@@ -2796,9 +2809,9 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
}
if (usvc->fwmark)
- svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark);
+ svc = __ip_vs_svc_fwm_find(net, usvc->af, usvc->fwmark);
else
- svc = __ip_vs_service_find(usvc->af, usvc->protocol,
+ svc = __ip_vs_service_find(net, usvc->af, usvc->protocol,
&usvc->addr, usvc->port);
*ret_svc = svc;
@@ -2835,13 +2848,14 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
return 0;
}
-static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
+static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
+ struct nlattr *nla)
{
struct ip_vs_service_user_kern usvc;
struct ip_vs_service *svc;
int ret;
- ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc);
+ ret = ip_vs_genl_parse_service(net, &usvc, nla, 0, &svc);
return ret ? ERR_PTR(ret) : svc;
}
@@ -2909,6 +2923,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
struct ip_vs_service *svc;
struct ip_vs_dest *dest;
struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
+ struct net *net;
mutex_lock(&__ip_vs_mutex);
@@ -2917,7 +2932,8 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb,
IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
goto out_err;
- svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
+ net = skb_sknet(skb);
+ svc = ip_vs_genl_find_service(net, attrs[IPVS_CMD_ATTR_SERVICE]);
if (IS_ERR(svc) || svc == NULL)
goto out_err;
@@ -3102,13 +3118,15 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
struct ip_vs_dest_user_kern udest;
int ret = 0, cmd;
int need_full_svc = 0, need_full_dest = 0;
+ struct net *net;
+ net = skb_sknet(skb);
cmd = info->genlhdr->cmd;
mutex_lock(&__ip_vs_mutex);
if (cmd == IPVS_CMD_FLUSH) {
- ret = ip_vs_flush();
+ ret = ip_vs_flush(net);
goto out;
} else if (cmd == IPVS_CMD_SET_CONFIG) {
ret = ip_vs_genl_set_config(info->attrs);
@@ -3133,7 +3151,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
goto out;
} else if (cmd == IPVS_CMD_ZERO &&
!info->attrs[IPVS_CMD_ATTR_SERVICE]) {
- ret = ip_vs_zero_all();
+ ret = ip_vs_zero_all(net);
goto out;
}
@@ -3143,7 +3161,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
need_full_svc = 1;
- ret = ip_vs_genl_parse_service(&usvc,
+ ret = ip_vs_genl_parse_service(net, &usvc,
info->attrs[IPVS_CMD_ATTR_SERVICE],
need_full_svc, &svc);
if (ret)
@@ -3173,7 +3191,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
switch (cmd) {
case IPVS_CMD_NEW_SERVICE:
if (svc == NULL)
- ret = ip_vs_add_service(&usvc, &svc);
+ ret = ip_vs_add_service(net, &usvc, &svc);
else
ret = -EEXIST;
break;
@@ -3211,7 +3229,9 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
struct sk_buff *msg;
void *reply;
int ret, cmd, reply_cmd;
+ struct net *net;
+ net = skb_sknet(skb);
cmd = info->genlhdr->cmd;
if (cmd == IPVS_CMD_GET_SERVICE)
@@ -3240,7 +3260,8 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
{
struct ip_vs_service *svc;
- svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
+ svc = ip_vs_genl_find_service(net,
+ info->attrs[IPVS_CMD_ATTR_SERVICE]);
if (IS_ERR(svc)) {
ret = PTR_ERR(svc);
goto out_err;
@@ -3411,9 +3432,15 @@ static void ip_vs_genl_unregister(void)
*/
int __net_init __ip_vs_control_init(struct net *net)
{
+ int idx;
+ struct netns_ipvs *ipvs = net_ipvs(net);
+
if (!net_eq(net, &init_net)) /* netns not enabled yet */
return -EPERM;
+ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
+ INIT_LIST_HEAD(&ipvs->rs_table[idx]);
+
proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
sysctl_header = register_net_sysctl_table(net, net_vs_ctl_path,
@@ -3445,43 +3472,48 @@ static struct pernet_operations ipvs_control_ops = {
int __init ip_vs_control_init(void)
{
- int ret;
int idx;
+ int ret;
EnterFunction(2);
- /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
+ /* Initialize svc_table, ip_vs_svc_fwm_table, rs_table */
for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
}
- for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
- INIT_LIST_HEAD(&ip_vs_rtable[idx]);
+
+ ret = register_pernet_subsys(&ipvs_control_ops);
+ if (ret) {
+ pr_err("cannot register namespace.\n");
+ goto err;
}
- smp_wmb();
+
+ smp_wmb(); /* Do we really need it now ? */
ret = nf_register_sockopt(&ip_vs_sockopts);
if (ret) {
pr_err("cannot register sockopt.\n");
- return ret;
+ goto err_net;
}
ret = ip_vs_genl_register();
if (ret) {
pr_err("cannot register Generic Netlink interface.\n");
nf_unregister_sockopt(&ip_vs_sockopts);
- return ret;
+ goto err_net;
}
- ret = register_pernet_subsys(&ipvs_control_ops);
- if (ret)
- return ret;
-
/* Hook the defense timer */
schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
LeaveFunction(2);
return 0;
+
+err_net:
+ unregister_pernet_subsys(&ipvs_control_ops);
+err:
+ return ret;
}
diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c
index a315159..521b827 100644
--- a/net/netfilter/ipvs/ip_vs_proto_sctp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c
@@ -12,6 +12,7 @@ static int
sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
int *verdict, struct ip_vs_conn **cpp)
{
+ struct net *net;
struct ip_vs_service *svc;
sctp_chunkhdr_t _schunkh, *sch;
sctp_sctphdr_t *sh, _sctph;
@@ -27,9 +28,9 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
sizeof(_schunkh), &_schunkh);
if (sch == NULL)
return 0;
-
+ net = skb_net(skb);
if ((sch->type == SCTP_CID_INIT) &&
- (svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+ (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
&iph.daddr, sh->dest))) {
int ignored;
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index 1cdab12..c175d31 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -31,6 +31,7 @@ static int
tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
int *verdict, struct ip_vs_conn **cpp)
{
+ struct net *net;
struct ip_vs_service *svc;
struct tcphdr _tcph, *th;
struct ip_vs_iphdr iph;
@@ -42,11 +43,11 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
*verdict = NF_DROP;
return 0;
}
-
+ net = skb_net(skb);
/* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */
if (th->syn &&
- (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr,
- th->dest))) {
+ (svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
+ &iph.daddr, th->dest))) {
int ignored;
if (ip_vs_todrop()) {
diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c
index cd398de..5ab54f6 100644
--- a/net/netfilter/ipvs/ip_vs_proto_udp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_udp.c
@@ -31,6 +31,7 @@ static int
udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
int *verdict, struct ip_vs_conn **cpp)
{
+ struct net *net;
struct ip_vs_service *svc;
struct udphdr _udph, *uh;
struct ip_vs_iphdr iph;
@@ -42,8 +43,8 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
*verdict = NF_DROP;
return 0;
}
-
- svc = ip_vs_service_get(af, skb->mark, iph.protocol,
+ net = skb_net(skb);
+ svc = ip_vs_service_get(net, af, skb->mark, iph.protocol,
&iph.daddr, uh->dest);
if (svc) {
int ignored;
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 3668739..662aa2c 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -749,7 +749,7 @@ static void ip_vs_proc_conn(struct ip_vs_conn_param *param, unsigned flags,
* If it is not found the connection will remain unbound
* but still handled.
*/
- dest = ip_vs_find_dest(type, daddr, dport, param->vaddr,
+ dest = ip_vs_find_dest(&init_net, type, daddr, dport, param->vaddr,
param->vport, protocol, fwmark);
/* Set the approprite ativity flag */
--
1.7.2.3
^ permalink raw reply related
* [PATCH 33/79] netfilter: fix compilation when conntrack is disabled but tproxy is enabled
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: KOVACS Krisztian <hidden@balabit.hu>
The IPv6 tproxy patches split IPv6 defragmentation off of conntrack, but
failed to update the #ifdef stanzas guarding the defragmentation related
fields and code in skbuff and conntrack related code in nf_defrag_ipv6.c.
This patch adds the required #ifdefs so that IPv6 tproxy can truly be used
without connection tracking.
Original report:
http://marc.info/?l=linux-netdev&m=129010118516341&w=2
Reported-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: KOVACS Krisztian <hidden@balabit.hu>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
include/linux/skbuff.h | 15 +++++++++++++++
include/net/netfilter/ipv6/nf_conntrack_ipv6.h | 10 ----------
include/net/netfilter/ipv6/nf_defrag_ipv6.h | 10 ++++++++++
net/core/skbuff.c | 2 ++
net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 8 +++++++-
5 files changed, 34 insertions(+), 11 deletions(-)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e6ba898..4f2db79 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -255,6 +255,11 @@ typedef unsigned int sk_buff_data_t;
typedef unsigned char *sk_buff_data_t;
#endif
+#if defined(CONFIG_NF_DEFRAG_IPV4) || defined(CONFIG_NF_DEFRAG_IPV4_MODULE) || \
+ defined(CONFIG_NF_DEFRAG_IPV6) || defined(CONFIG_NF_DEFRAG_IPV6_MODULE)
+#define NET_SKBUFF_NF_DEFRAG_NEEDED 1
+#endif
+
/**
* struct sk_buff - socket buffer
* @next: Next buffer in list
@@ -362,6 +367,8 @@ struct sk_buff {
void (*destructor)(struct sk_buff *skb);
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct nf_conntrack *nfct;
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
struct sk_buff *nfct_reasm;
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
@@ -2051,6 +2058,8 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
if (nfct)
atomic_inc(&nfct->use);
}
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
static inline void nf_conntrack_get_reasm(struct sk_buff *skb)
{
if (skb)
@@ -2079,6 +2088,8 @@ static inline void nf_reset(struct sk_buff *skb)
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_conntrack_put(skb->nfct);
skb->nfct = NULL;
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
nf_conntrack_put_reasm(skb->nfct_reasm);
skb->nfct_reasm = NULL;
#endif
@@ -2095,6 +2106,8 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
dst->nfct = src->nfct;
nf_conntrack_get(src->nfct);
dst->nfctinfo = src->nfctinfo;
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
dst->nfct_reasm = src->nfct_reasm;
nf_conntrack_get_reasm(src->nfct_reasm);
#endif
@@ -2108,6 +2121,8 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
{
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_conntrack_put(dst->nfct);
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
nf_conntrack_put_reasm(dst->nfct_reasm);
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
diff --git a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
index 1ee717e..a4c9936 100644
--- a/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_conntrack_ipv6.h
@@ -7,16 +7,6 @@ extern struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6;
extern struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6;
extern struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6;
-extern int nf_ct_frag6_init(void);
-extern void nf_ct_frag6_cleanup(void);
-extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user);
-extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
- struct net_device *in,
- struct net_device *out,
- int (*okfn)(struct sk_buff *));
-
-struct inet_frags_ctl;
-
#include <linux/sysctl.h>
extern struct ctl_table nf_ct_ipv6_sysctl_table[];
diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
index 94dd54d..fd79c9a 100644
--- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
@@ -3,4 +3,14 @@
extern void nf_defrag_ipv6_enable(void);
+extern int nf_ct_frag6_init(void);
+extern void nf_ct_frag6_cleanup(void);
+extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user);
+extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
+ struct net_device *in,
+ struct net_device *out,
+ int (*okfn)(struct sk_buff *));
+
+struct inet_frags_ctl;
+
#endif /* _NF_DEFRAG_IPV6_H */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 104f844..74ebf4b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -380,6 +380,8 @@ static void skb_release_head_state(struct sk_buff *skb)
}
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_conntrack_put(skb->nfct);
+#endif
+#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
nf_conntrack_put_reasm(skb->nfct_reasm);
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index 99abfb5..97c5b21 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -19,13 +19,15 @@
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_bridge.h>
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
+#endif
+#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
@@ -33,8 +35,10 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
{
u16 zone = NF_CT_DEFAULT_ZONE;
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
if (skb->nfct)
zone = nf_ct_zone((struct nf_conn *)skb->nfct);
+#endif
#ifdef CONFIG_BRIDGE_NETFILTER
if (skb->nf_bridge &&
@@ -56,9 +60,11 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
{
struct sk_buff *reasm;
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
/* Previously seen (loopback)? */
if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
return NF_ACCEPT;
+#endif
reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb));
/* queued */
--
1.7.2.3
^ permalink raw reply related
* [PATCH 28/79] IPVS: Backup, Adding structs for new sync format
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
New structs defined for version 1 of sync.
* ip_vs_sync_v4 Ipv4 base format struct
* ip_vs_sync_v6 Ipv6 base format struct
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_sync.c | 154 ++++++++++++++++++++++++++++++++++++---
1 files changed, 142 insertions(+), 12 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 47eed67..566482f 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -43,11 +43,13 @@
#define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */
#define IP_VS_SYNC_PORT 8848 /* multicast port */
+#define SYNC_PROTO_VER 1 /* Protocol version in header */
/*
* IPVS sync connection entry
+ * Version 0, i.e. original version.
*/
-struct ip_vs_sync_conn {
+struct ip_vs_sync_conn_v0 {
__u8 reserved;
/* Protocol, addresses and port numbers */
@@ -71,40 +73,157 @@ struct ip_vs_sync_conn_options {
struct ip_vs_seq out_seq; /* outgoing seq. struct */
};
+/*
+ Sync Connection format (sync_conn)
+
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Type | Protocol | Ver. | Size |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Flags |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | State | cport |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | vport | dport |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | fwmark |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | timeout (in sec.) |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | ... |
+ | IP-Addresses (v4 or v6) |
+ | ... |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ Optional Parameters.
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Param. Type | Param. Length | Param. data |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |
+ | ... |
+ | +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | | Param Type | Param. Length |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Param data |
+ | Last Param data should be padded for 32 bit alignment |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*/
+
+/*
+ * Type 0, IPv4 sync connection format
+ */
+struct ip_vs_sync_v4 {
+ __u8 type;
+ __u8 protocol; /* Which protocol (TCP/UDP) */
+ __be16 ver_size; /* Version msb 4 bits */
+ /* Flags and state transition */
+ __be32 flags; /* status flags */
+ __be16 state; /* state info */
+ /* Protocol, addresses and port numbers */
+ __be16 cport;
+ __be16 vport;
+ __be16 dport;
+ __be32 fwmark; /* Firewall mark from skb */
+ __be32 timeout; /* cp timeout */
+ __be32 caddr; /* client address */
+ __be32 vaddr; /* virtual address */
+ __be32 daddr; /* destination address */
+ /* The sequence options start here */
+ /* PE data padded to 32bit alignment after seq. options */
+};
+/*
+ * Type 2 messages IPv6
+ */
+struct ip_vs_sync_v6 {
+ __u8 type;
+ __u8 protocol; /* Which protocol (TCP/UDP) */
+ __be16 ver_size; /* Version msb 4 bits */
+ /* Flags and state transition */
+ __be32 flags; /* status flags */
+ __be16 state; /* state info */
+ /* Protocol, addresses and port numbers */
+ __be16 cport;
+ __be16 vport;
+ __be16 dport;
+ __be32 fwmark; /* Firewall mark from skb */
+ __be32 timeout; /* cp timeout */
+ struct in6_addr caddr; /* client address */
+ struct in6_addr vaddr; /* virtual address */
+ struct in6_addr daddr; /* destination address */
+ /* The sequence options start here */
+ /* PE data padded to 32bit alignment after seq. options */
+};
+
+union ip_vs_sync_conn {
+ struct ip_vs_sync_v4 v4;
+ struct ip_vs_sync_v6 v6;
+};
+
+/* Bits in Type field in above */
+#define STYPE_INET6 0
+#define STYPE_F_INET6 (1 << STYPE_INET6)
+
+#define SVER_SHIFT 12 /* Shift to get version */
+#define SVER_MASK 0x0fff /* Mask to strip version */
+
+#define IPVS_OPT_SEQ_DATA 1
+#define IPVS_OPT_PE_DATA 2
+#define IPVS_OPT_PE_NAME 3
+#define IPVS_OPT_PARAM 7
+
+#define IPVS_OPT_F_SEQ_DATA (1 << (IPVS_OPT_SEQ_DATA-1))
+#define IPVS_OPT_F_PE_DATA (1 << (IPVS_OPT_PE_DATA-1))
+#define IPVS_OPT_F_PE_NAME (1 << (IPVS_OPT_PE_NAME-1))
+#define IPVS_OPT_F_PARAM (1 << (IPVS_OPT_PARAM-1))
+
struct ip_vs_sync_thread_data {
struct socket *sock;
char *buf;
};
-#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn))
+/* Version 0 definition of packet sizes */
+#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn_v0))
#define FULL_CONN_SIZE \
-(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))
+(sizeof(struct ip_vs_sync_conn_v0) + sizeof(struct ip_vs_sync_conn_options))
/*
- The master mulitcasts messages to the backup load balancers in the
- following format.
+ The master mulitcasts messages (Datagrams) to the backup load balancers
+ in the following format.
+
+ Version 1:
+ Note, first byte should be Zero, so ver 0 receivers will drop the packet.
0 1 2 3
0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
- | Count Conns | SyncID | Size |
+ | 0 | SyncID | Size |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Count Conns | Version | Reserved, set to Zero |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| |
| IPVS Sync Connection (1) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| . |
- | . |
+ ~ . ~
| . |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
| |
| IPVS Sync Connection (n) |
+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+ Version 0 Header
+ 0 1 2 3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | Count Conns | SyncID | Size |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | IPVS Sync Connection (1) |
*/
#define SYNC_MESG_HEADER_LEN 4
#define MAX_CONNS_PER_SYNCBUFF 255 /* nr_conns in ip_vs_sync_mesg is 8 bit */
+/* Version 0 header */
struct ip_vs_sync_mesg {
__u8 nr_conns;
__u8 syncid;
@@ -113,6 +232,17 @@ struct ip_vs_sync_mesg {
/* ip_vs_sync_conn entries start here */
};
+/* Version 1 header */
+struct ip_vs_sync_mesg_v2 {
+ __u8 reserved; /* must be zero */
+ __u8 syncid;
+ __u16 size;
+ __u8 nr_conns;
+ __s8 version; /* SYNC_PROTO_VER */
+ __u16 spare;
+ /* ip_vs_sync_conn entries start here */
+};
+
/* the maximum length of sync (sending/receiving) message */
static int sync_send_mesg_maxlen;
static int sync_recv_mesg_maxlen;
@@ -239,7 +369,7 @@ get_curr_sync_buff(unsigned long time)
void ip_vs_sync_conn(const struct ip_vs_conn *cp)
{
struct ip_vs_sync_mesg *m;
- struct ip_vs_sync_conn *s;
+ struct ip_vs_sync_conn_v0 *s;
int len;
spin_lock(&curr_sb_lock);
@@ -254,7 +384,7 @@ void ip_vs_sync_conn(const struct ip_vs_conn *cp)
len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE :
SIMPLE_CONN_SIZE;
m = curr_sb->mesg;
- s = (struct ip_vs_sync_conn *)curr_sb->head;
+ s = (struct ip_vs_sync_conn_v0 *)curr_sb->head;
/* copy members */
s->protocol = cp->protocol;
@@ -306,7 +436,7 @@ ip_vs_conn_fill_param_sync(int af, int protocol,
static void ip_vs_process_message(char *buffer, const size_t buflen)
{
struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer;
- struct ip_vs_sync_conn *s;
+ struct ip_vs_sync_conn_v0 *s;
struct ip_vs_sync_conn_options *opt;
struct ip_vs_conn *cp;
struct ip_vs_protocol *pp;
@@ -343,7 +473,7 @@ static void ip_vs_process_message(char *buffer, const size_t buflen)
IP_VS_ERR_RL("bogus conn in sync message\n");
return;
}
- s = (struct ip_vs_sync_conn *) p;
+ s = (struct ip_vs_sync_conn_v0 *) p;
flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
flags &= ~IP_VS_CONN_F_HASHED;
if (flags & IP_VS_CONN_F_SEQ_MASK) {
@@ -849,7 +979,7 @@ int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
IP_VS_DBG(7, "%s(): pid %d\n", __func__, task_pid_nr(current));
IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
- sizeof(struct ip_vs_sync_conn));
+ sizeof(struct ip_vs_sync_conn_v0));
if (state == IP_VS_STATE_MASTER) {
if (sync_master_thread)
--
1.7.2.3
^ permalink raw reply related
* [PATCH 26/79] IPVS: skb defrag in L7 helpers
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
L7 helpers like sip needs skb defrag
since L7 data can be fragmented.
This patch requires "IPVS Break ports-2 into src_port and dst_port" patch
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_pe_sip.c | 3 +++
1 files changed, 3 insertions(+), 0 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index b8b4e96..0d83bc0 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -71,6 +71,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
struct ip_vs_iphdr iph;
unsigned int dataoff, datalen, matchoff, matchlen;
const char *dptr;
+ int retc;
ip_vs_fill_iphdr(p->af, skb_network_header(skb), &iph);
@@ -83,6 +84,8 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
if (dataoff >= skb->len)
return -EINVAL;
+ if ((retc=skb_linearize(skb)) < 0)
+ return retc;
dptr = skb->data + dataoff;
datalen = skb->len - dataoff;
--
1.7.2.3
^ permalink raw reply related
* [PATCH 25/79] IPVS: Split ports[2] into src_port and dst_port
From: kaber @ 2011-01-19 19:14 UTC (permalink / raw)
To: davem; +Cc: netfilter-devel, netdev
In-Reply-To: <1295464519-21763-1-git-send-email-kaber@trash.net>
From: Hans Schillstrom <hans.schillstrom@ericsson.com>
Avoid sending invalid pointer due to skb_linearize() call.
This patch prepares for next patch where skb_linearize is a part.
In ip_vs_sched_persist() params the ports ptr will be replaced by
src and dst port.
Signed-off-by: Hans Schillstrom <hans.schillstrom@ericsson.com>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
net/netfilter/ipvs/ip_vs_core.c | 21 +++++++++++----------
1 files changed, 11 insertions(+), 10 deletions(-)
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index e2bb3cd..9acdd79 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -200,7 +200,7 @@ ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc,
static struct ip_vs_conn *
ip_vs_sched_persist(struct ip_vs_service *svc,
struct sk_buff *skb,
- __be16 ports[2])
+ __be16 src_port, __be16 dst_port)
{
struct ip_vs_conn *cp = NULL;
struct ip_vs_iphdr iph;
@@ -224,8 +224,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
IP_VS_DBG_BUF(6, "p-schedule: src %s:%u dest %s:%u "
"mnet %s\n",
- IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(ports[0]),
- IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(ports[1]),
+ IP_VS_DBG_ADDR(svc->af, &iph.saddr), ntohs(src_port),
+ IP_VS_DBG_ADDR(svc->af, &iph.daddr), ntohs(dst_port),
IP_VS_DBG_ADDR(svc->af, &snet));
/*
@@ -247,14 +247,14 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };
__be16 vport = 0;
- if (ports[1] == svc->port) {
+ if (dst_port == svc->port) {
/* non-FTP template:
* <protocol, caddr, 0, vaddr, vport, daddr, dport>
* FTP template:
* <protocol, caddr, 0, vaddr, 0, daddr, 0>
*/
if (svc->port != FTPPORT)
- vport = ports[1];
+ vport = dst_port;
} else {
/* Note: persistent fwmark-based services and
* persistent port zero service are handled here.
@@ -285,7 +285,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
return NULL;
}
- if (ports[1] == svc->port && svc->port != FTPPORT)
+ if (dst_port == svc->port && svc->port != FTPPORT)
dport = dest->port;
/* Create a template
@@ -306,7 +306,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
kfree(param.pe_data);
}
- dport = ports[1];
+ dport = dst_port;
if (dport == svc->port && dest->port)
dport = dest->port;
@@ -317,8 +317,9 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
/*
* Create a new connection according to the template
*/
- ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0],
- &iph.daddr, ports[1], ¶m);
+ ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, src_port,
+ &iph.daddr, dst_port, ¶m);
+
cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest, skb->mark);
if (cp == NULL) {
ip_vs_conn_put(ct);
@@ -388,7 +389,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
*/
if (svc->flags & IP_VS_SVC_F_PERSISTENT) {
*ignored = 0;
- return ip_vs_sched_persist(svc, skb, pptr);
+ return ip_vs_sched_persist(svc, skb, pptr[0], pptr[1]);
}
/*
--
1.7.2.3
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox