* [PATCH net-next 01/12] openvswitch: use const in some local vars and casts
[not found] ` <1400274459-56304-1-git-send-email-jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
@ 2014-05-16 21:07 ` Jesse Gross
2014-05-16 21:07 ` [PATCH net-next 02/12] openvswitch: avoid warnings in vport_from_priv Jesse Gross
` (11 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Jesse Gross @ 2014-05-16 21:07 UTC (permalink / raw)
To: David Miller
Cc: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA,
Daniele Di Proietto
From: Daniele Di Proietto <daniele.di.proietto-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
In few functions, const formal parameters are assigned or cast to
non-const.
These changes suppress warnings if compiled with -Wcast-qual.
Signed-off-by: Daniele Di Proietto <daniele.di.proietto-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Signed-off-by: Jesse Gross <jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
---
net/openvswitch/flow_netlink.c | 6 ++++--
net/openvswitch/flow_table.c | 16 +++++++++-------
2 files changed, 13 insertions(+), 9 deletions(-)
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 4d000ac..5517bd6 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -628,8 +628,10 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple
if (is_mask && exact_5tuple && *exact_5tuple) {
if (ipv6_key->ipv6_proto != 0xff ||
- !is_all_set((u8 *)ipv6_key->ipv6_src, sizeof(match->key->ipv6.addr.src)) ||
- !is_all_set((u8 *)ipv6_key->ipv6_dst, sizeof(match->key->ipv6.addr.dst)))
+ !is_all_set((const u8 *)ipv6_key->ipv6_src,
+ sizeof(match->key->ipv6.addr.src)) ||
+ !is_all_set((const u8 *)ipv6_key->ipv6_dst,
+ sizeof(match->key->ipv6.addr.dst)))
*exact_5tuple = false;
}
}
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 3c268b3..1ba1e0b 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -57,8 +57,10 @@ static u16 range_n_bytes(const struct sw_flow_key_range *range)
void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
const struct sw_flow_mask *mask)
{
- const long *m = (long *)((u8 *)&mask->key + mask->range.start);
- const long *s = (long *)((u8 *)src + mask->range.start);
+ const long *m = (const long *)((const u8 *)&mask->key +
+ mask->range.start);
+ const long *s = (const long *)((const u8 *)src +
+ mask->range.start);
long *d = (long *)((u8 *)dst + mask->range.start);
int i;
@@ -375,7 +377,7 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table)
static u32 flow_hash(const struct sw_flow_key *key, int key_start,
int key_end)
{
- u32 *hash_key = (u32 *)((u8 *)key + key_start);
+ const u32 *hash_key = (const u32 *)((const u8 *)key + key_start);
int hash_u32s = (key_end - key_start) >> 2;
/* Make sure number of hash bytes are multiple of u32. */
@@ -397,8 +399,8 @@ static bool cmp_key(const struct sw_flow_key *key1,
const struct sw_flow_key *key2,
int key_start, int key_end)
{
- const long *cp1 = (long *)((u8 *)key1 + key_start);
- const long *cp2 = (long *)((u8 *)key2 + key_start);
+ const long *cp1 = (const long *)((const u8 *)key1 + key_start);
+ const long *cp2 = (const long *)((const u8 *)key2 + key_start);
long diffs = 0;
int i;
@@ -513,8 +515,8 @@ static struct sw_flow_mask *mask_alloc(void)
static bool mask_equal(const struct sw_flow_mask *a,
const struct sw_flow_mask *b)
{
- u8 *a_ = (u8 *)&a->key + a->range.start;
- u8 *b_ = (u8 *)&b->key + b->range.start;
+ const u8 *a_ = (const u8 *)&a->key + a->range.start;
+ const u8 *b_ = (const u8 *)&b->key + b->range.start;
return (a->range.end == b->range.end)
&& (a->range.start == b->range.start)
--
1.9.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH net-next 02/12] openvswitch: avoid warnings in vport_from_priv
[not found] ` <1400274459-56304-1-git-send-email-jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
2014-05-16 21:07 ` [PATCH net-next 01/12] openvswitch: use const in some local vars and casts Jesse Gross
@ 2014-05-16 21:07 ` Jesse Gross
2014-05-16 21:07 ` [PATCH net-next 03/12] openvswitch: avoid cast-qual warning in vport_priv Jesse Gross
` (10 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Jesse Gross @ 2014-05-16 21:07 UTC (permalink / raw)
To: David Miller
Cc: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA,
Daniele Di Proietto
From: Daniele Di Proietto <daniele.di.proietto-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
This change, firstly, avoids declaring the formal parameter const,
since it is treated as non const. (to avoid -Wcast-qual)
Secondly, it cast the pointer from void* to u8*, since it is used
in arithmetic (to avoid -Wpointer-arith)
Signed-off-by: Daniele Di Proietto <daniele.di.proietto-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Signed-off-by: Jesse Gross <jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
---
net/openvswitch/vport.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index d7e50a1..3e12940 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -185,9 +185,9 @@ static inline void *vport_priv(const struct vport *vport)
* the result of a hash table lookup. @priv must point to the start of the
* private data area.
*/
-static inline struct vport *vport_from_priv(const void *priv)
+static inline struct vport *vport_from_priv(void *priv)
{
- return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN));
+ return (struct vport *)((u8 *)priv - ALIGN(sizeof(struct vport), VPORT_ALIGN));
}
void ovs_vport_receive(struct vport *, struct sk_buff *,
--
1.9.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH net-next 03/12] openvswitch: avoid cast-qual warning in vport_priv
[not found] ` <1400274459-56304-1-git-send-email-jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
2014-05-16 21:07 ` [PATCH net-next 01/12] openvswitch: use const in some local vars and casts Jesse Gross
2014-05-16 21:07 ` [PATCH net-next 02/12] openvswitch: avoid warnings in vport_from_priv Jesse Gross
@ 2014-05-16 21:07 ` Jesse Gross
2014-05-16 21:07 ` [PATCH net-next 04/12] openvswitch: Added (unsigned long long) cast in printf Jesse Gross
` (9 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Jesse Gross @ 2014-05-16 21:07 UTC (permalink / raw)
To: David Miller
Cc: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA,
Daniele Di Proietto
From: Daniele Di Proietto <daniele.di.proietto-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
This function must cast a const value to a non const value.
By adding an uintptr_t cast the warning is suppressed.
To avoid the cast (proper solution) several function signatures
must be changed.
Signed-off-by: Daniele Di Proietto <daniele.di.proietto-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Signed-off-by: Jesse Gross <jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
---
net/openvswitch/vport.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index 3e12940..8d721e6 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -172,7 +172,7 @@ void ovs_vport_deferred_free(struct vport *vport);
*/
static inline void *vport_priv(const struct vport *vport)
{
- return (u8 *)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN);
+ return (u8 *)(uintptr_t)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN);
}
/**
--
1.9.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH net-next 04/12] openvswitch: Added (unsigned long long) cast in printf
[not found] ` <1400274459-56304-1-git-send-email-jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
` (2 preceding siblings ...)
2014-05-16 21:07 ` [PATCH net-next 03/12] openvswitch: avoid cast-qual warning in vport_priv Jesse Gross
@ 2014-05-16 21:07 ` Jesse Gross
2014-05-16 21:07 ` [PATCH net-next 05/12] openvswitch: Use net_ratelimit in OVS_NLERR Jesse Gross
` (8 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Jesse Gross @ 2014-05-16 21:07 UTC (permalink / raw)
To: David Miller
Cc: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA,
Daniele Di Proietto
From: Daniele Di Proietto <daniele.di.proietto-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
This is necessary, since u64 is not unsigned long long
in all architectures: u64 could be also uint64_t.
Signed-off-by: Daniele Di Proietto <daniele.di.proietto-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Signed-off-by: Jesse Gross <jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
---
net/openvswitch/flow_netlink.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 5517bd6..1b22ad2 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -216,14 +216,14 @@ static bool match_validate(const struct sw_flow_match *match,
if ((key_attrs & key_expected) != key_expected) {
/* Key attributes check failed. */
OVS_NLERR("Missing expected key attributes (key_attrs=%llx, expected=%llx).\n",
- key_attrs, key_expected);
+ (unsigned long long)key_attrs, (unsigned long long)key_expected);
return false;
}
if ((mask_attrs & mask_allowed) != mask_attrs) {
/* Mask attributes check failed. */
OVS_NLERR("Contain more than allowed mask fields (mask_attrs=%llx, mask_allowed=%llx).\n",
- mask_attrs, mask_allowed);
+ (unsigned long long)mask_attrs, (unsigned long long)mask_allowed);
return false;
}
--
1.9.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH net-next 05/12] openvswitch: Use net_ratelimit in OVS_NLERR
[not found] ` <1400274459-56304-1-git-send-email-jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
` (3 preceding siblings ...)
2014-05-16 21:07 ` [PATCH net-next 04/12] openvswitch: Added (unsigned long long) cast in printf Jesse Gross
@ 2014-05-16 21:07 ` Jesse Gross
2014-05-16 21:07 ` [PATCH net-next 06/12] openvswitch: flow_netlink: Use pr_fmt to OVS_NLERR output Jesse Gross
` (7 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Jesse Gross @ 2014-05-16 21:07 UTC (permalink / raw)
To: David Miller
Cc: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA,
Joe Perches
From: Joe Perches <joe-6d6DIl74uiNBDgjK7y7TUQ@public.gmane.org>
Each use of pr_<level>_once has a per-site flag.
Some of the OVS_NLERR messages look as if seeing them
multiple times could be useful, so use net_ratelimit()
instead of pr_info_once.
Signed-off-by: Joe Perches <joe-6d6DIl74uiNBDgjK7y7TUQ@public.gmane.org>
Signed-off-by: Jesse Gross <jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
---
net/openvswitch/datapath.h | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 0531738..7ede507 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -194,7 +194,9 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
void ovs_dp_notify_wq(struct work_struct *work);
-#define OVS_NLERR(fmt, ...) \
- pr_info_once("netlink: " fmt, ##__VA_ARGS__)
-
+#define OVS_NLERR(fmt, ...) \
+do { \
+ if (net_ratelimit()) \
+ pr_info("netlink: " fmt, ##__VA_ARGS__); \
+} while (0)
#endif /* datapath.h */
--
1.9.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH net-next 06/12] openvswitch: flow_netlink: Use pr_fmt to OVS_NLERR output
[not found] ` <1400274459-56304-1-git-send-email-jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
` (4 preceding siblings ...)
2014-05-16 21:07 ` [PATCH net-next 05/12] openvswitch: Use net_ratelimit in OVS_NLERR Jesse Gross
@ 2014-05-16 21:07 ` Jesse Gross
2014-05-16 21:07 ` [PATCH net-next 07/12] openvswitch: Use ether_addr_copy Jesse Gross
` (6 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Jesse Gross @ 2014-05-16 21:07 UTC (permalink / raw)
To: David Miller
Cc: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA,
Joe Perches
From: Joe Perches <joe-6d6DIl74uiNBDgjK7y7TUQ@public.gmane.org>
Add "openvswitch: " prefix to OVS_NLERR output
to match the other OVS_NLERR output of datapath.c
Signed-off-by: Joe Perches <joe-6d6DIl74uiNBDgjK7y7TUQ@public.gmane.org>
Signed-off-by: Jesse Gross <jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
---
net/openvswitch/flow_netlink.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 1b22ad2..7bd09b7 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -16,6 +16,8 @@
* 02110-1301, USA
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include "flow.h"
#include "datapath.h"
#include <linux/uaccess.h>
--
1.9.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH net-next 07/12] openvswitch: Use ether_addr_copy
[not found] ` <1400274459-56304-1-git-send-email-jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
` (5 preceding siblings ...)
2014-05-16 21:07 ` [PATCH net-next 06/12] openvswitch: flow_netlink: Use pr_fmt to OVS_NLERR output Jesse Gross
@ 2014-05-16 21:07 ` Jesse Gross
2014-05-16 21:07 ` [PATCH net-next 08/12] openvswitch: Remove 5-tuple optimization Jesse Gross
` (5 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Jesse Gross @ 2014-05-16 21:07 UTC (permalink / raw)
To: David Miller
Cc: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA,
Joe Perches
From: Joe Perches <joe-6d6DIl74uiNBDgjK7y7TUQ@public.gmane.org>
It's slightly smaller/faster for some architectures.
Signed-off-by: Joe Perches <joe-6d6DIl74uiNBDgjK7y7TUQ@public.gmane.org>
Signed-off-by: Jesse Gross <jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
---
net/openvswitch/actions.c | 4 ++--
net/openvswitch/flow.c | 16 ++++++++--------
net/openvswitch/flow_netlink.c | 12 ++++++------
3 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 2c77e7b..c36856a 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -134,8 +134,8 @@ static int set_eth_addr(struct sk_buff *skb,
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
- memcpy(eth_hdr(skb)->h_source, eth_key->eth_src, ETH_ALEN);
- memcpy(eth_hdr(skb)->h_dest, eth_key->eth_dst, ETH_ALEN);
+ ether_addr_copy(eth_hdr(skb)->h_source, eth_key->eth_src);
+ ether_addr_copy(eth_hdr(skb)->h_dest, eth_key->eth_dst);
ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 2998989..332aa01 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -372,14 +372,14 @@ static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
&& opt_len == 8) {
if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll)))
goto invalid;
- memcpy(key->ipv6.nd.sll,
- &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+ ether_addr_copy(key->ipv6.nd.sll,
+ &nd->opt[offset+sizeof(*nd_opt)]);
} else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR
&& opt_len == 8) {
if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll)))
goto invalid;
- memcpy(key->ipv6.nd.tll,
- &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+ ether_addr_copy(key->ipv6.nd.tll,
+ &nd->opt[offset+sizeof(*nd_opt)]);
}
icmp_len -= opt_len;
@@ -439,8 +439,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
* header in the linear data area.
*/
eth = eth_hdr(skb);
- memcpy(key->eth.src, eth->h_source, ETH_ALEN);
- memcpy(key->eth.dst, eth->h_dest, ETH_ALEN);
+ ether_addr_copy(key->eth.src, eth->h_source);
+ ether_addr_copy(key->eth.dst, eth->h_dest);
__skb_pull(skb, 2 * ETH_ALEN);
/* We are going to push all headers that we pull, so no need to
@@ -538,8 +538,8 @@ int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key)
key->ip.proto = ntohs(arp->ar_op);
memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
- memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
- memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
+ ether_addr_copy(key->ipv4.arp.sha, arp->ar_sha);
+ ether_addr_copy(key->ipv4.arp.tha, arp->ar_tha);
}
} else if (key->eth.type == htons(ETH_P_IPV6)) {
int nh_len; /* IPv6 Header + Extensions */
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 7bd09b7..5511ad1 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -986,8 +986,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
goto nla_put_failure;
eth_key = nla_data(nla);
- memcpy(eth_key->eth_src, output->eth.src, ETH_ALEN);
- memcpy(eth_key->eth_dst, output->eth.dst, ETH_ALEN);
+ ether_addr_copy(eth_key->eth_src, output->eth.src);
+ ether_addr_copy(eth_key->eth_dst, output->eth.dst);
if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
__be16 eth_type;
@@ -1059,8 +1059,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
arp_key->arp_sip = output->ipv4.addr.src;
arp_key->arp_tip = output->ipv4.addr.dst;
arp_key->arp_op = htons(output->ip.proto);
- memcpy(arp_key->arp_sha, output->ipv4.arp.sha, ETH_ALEN);
- memcpy(arp_key->arp_tha, output->ipv4.arp.tha, ETH_ALEN);
+ ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
+ ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
}
if ((swkey->eth.type == htons(ETH_P_IP) ||
@@ -1147,8 +1147,8 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
nd_key = nla_data(nla);
memcpy(nd_key->nd_target, &output->ipv6.nd.target,
sizeof(nd_key->nd_target));
- memcpy(nd_key->nd_sll, output->ipv6.nd.sll, ETH_ALEN);
- memcpy(nd_key->nd_tll, output->ipv6.nd.tll, ETH_ALEN);
+ ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll);
+ ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll);
}
}
}
--
1.9.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH net-next 08/12] openvswitch: Remove 5-tuple optimization.
[not found] ` <1400274459-56304-1-git-send-email-jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
` (6 preceding siblings ...)
2014-05-16 21:07 ` [PATCH net-next 07/12] openvswitch: Use ether_addr_copy Jesse Gross
@ 2014-05-16 21:07 ` Jesse Gross
2014-05-16 21:07 ` [PATCH net-next 09/12] openvswitch: Per NUMA node flow stats Jesse Gross
` (4 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Jesse Gross @ 2014-05-16 21:07 UTC (permalink / raw)
To: David Miller; +Cc: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA
From: Jarno Rajahalme <jrajahalme-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
The 5-tuple optimization becomes unnecessary with a later per-NUMA
node stats patch. Remove it first to make the changes easier to
grasp.
Signed-off-by: Jarno Rajahalme <jrajahalme-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
Signed-off-by: Jesse Gross <jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
---
net/openvswitch/datapath.c | 11 ++++----
net/openvswitch/flow.c | 32 +++++++++--------------
net/openvswitch/flow.h | 10 +-------
net/openvswitch/flow_netlink.c | 58 +++---------------------------------------
net/openvswitch/flow_netlink.h | 1 -
net/openvswitch/flow_table.c | 31 +++++++---------------
net/openvswitch/flow_table.h | 2 +-
7 files changed, 32 insertions(+), 113 deletions(-)
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index a3276e3..8867d7e 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -524,7 +524,7 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
packet->protocol = htons(ETH_P_802_2);
/* Build an sw_flow for sending this packet. */
- flow = ovs_flow_alloc(false);
+ flow = ovs_flow_alloc();
err = PTR_ERR(flow);
if (IS_ERR(flow))
goto err_kfree_skb;
@@ -782,7 +782,6 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
struct datapath *dp;
struct sw_flow_actions *acts = NULL;
struct sw_flow_match match;
- bool exact_5tuple;
int error;
/* Extract key. */
@@ -791,7 +790,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
goto error;
ovs_match_init(&match, &key, &mask);
- error = ovs_nla_get_match(&match, &exact_5tuple,
+ error = ovs_nla_get_match(&match,
a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK]);
if (error)
goto error;
@@ -830,7 +829,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
goto err_unlock_ovs;
/* Allocate flow. */
- flow = ovs_flow_alloc(!exact_5tuple);
+ flow = ovs_flow_alloc();
if (IS_ERR(flow)) {
error = PTR_ERR(flow);
goto err_unlock_ovs;
@@ -914,7 +913,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
}
ovs_match_init(&match, &key, NULL);
- err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL);
+ err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
if (err)
return err;
@@ -968,7 +967,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
}
ovs_match_init(&match, &key, NULL);
- err = ovs_nla_get_match(&match, NULL, a[OVS_FLOW_ATTR_KEY], NULL);
+ err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL);
if (err)
goto unlock;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 332aa01..aad7a8d 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -66,10 +66,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)
struct flow_stats *stats;
__be16 tcp_flags = 0;
- if (!flow->stats.is_percpu)
- stats = flow->stats.stat;
- else
- stats = this_cpu_ptr(flow->stats.cpu_stats);
+ stats = this_cpu_ptr(flow->stats);
if ((flow->key.eth.type == htons(ETH_P_IP) ||
flow->key.eth.type == htons(ETH_P_IPV6)) &&
@@ -110,16 +107,14 @@ void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats,
memset(ovs_stats, 0, sizeof(*ovs_stats));
local_bh_disable();
- if (!flow->stats.is_percpu) {
- stats_read(flow->stats.stat, ovs_stats, used, tcp_flags);
- } else {
- for_each_possible_cpu(cpu) {
- struct flow_stats *stats;
-
- stats = per_cpu_ptr(flow->stats.cpu_stats, cpu);
- stats_read(stats, ovs_stats, used, tcp_flags);
- }
+
+ for_each_possible_cpu(cpu) {
+ struct flow_stats *stats;
+
+ stats = per_cpu_ptr(flow->stats.cpu_stats, cpu);
+ stats_read(stats, ovs_stats, used, tcp_flags);
}
+
local_bh_enable();
}
@@ -138,13 +133,10 @@ void ovs_flow_stats_clear(struct sw_flow *flow)
int cpu;
local_bh_disable();
- if (!flow->stats.is_percpu) {
- stats_reset(flow->stats.stat);
- } else {
- for_each_possible_cpu(cpu) {
- stats_reset(per_cpu_ptr(flow->stats.cpu_stats, cpu));
- }
- }
+
+ for_each_possible_cpu(cpu)
+ stats_reset(per_cpu_ptr(flow->stats, cpu));
+
local_bh_enable();
}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 2d770e2..9c0dd8a 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -155,14 +155,6 @@ struct flow_stats {
__be16 tcp_flags; /* Union of seen TCP flags. */
};
-struct sw_flow_stats {
- bool is_percpu;
- union {
- struct flow_stats *stat;
- struct flow_stats __percpu *cpu_stats;
- };
-};
-
struct sw_flow {
struct rcu_head rcu;
struct hlist_node hash_node[2];
@@ -172,7 +164,7 @@ struct sw_flow {
struct sw_flow_key unmasked_key;
struct sw_flow_mask *mask;
struct sw_flow_actions __rcu *sf_acts;
- struct sw_flow_stats stats;
+ struct flow_stats __percpu *stats;
};
struct arp_eth_header {
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 5511ad1..84caa99 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -268,20 +268,6 @@ static bool is_all_zero(const u8 *fp, size_t size)
return true;
}
-static bool is_all_set(const u8 *fp, size_t size)
-{
- int i;
-
- if (!fp)
- return false;
-
- for (i = 0; i < size; i++)
- if (fp[i] != 0xff)
- return false;
-
- return true;
-}
-
static int __parse_flow_nlattrs(const struct nlattr *attr,
const struct nlattr *a[],
u64 *attrsp, bool nz)
@@ -503,9 +489,8 @@ static int metadata_from_nlattrs(struct sw_flow_match *match, u64 *attrs,
return 0;
}
-static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple,
- u64 attrs, const struct nlattr **a,
- bool is_mask)
+static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
+ const struct nlattr **a, bool is_mask)
{
int err;
u64 orig_attrs = attrs;
@@ -562,11 +547,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple
SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
}
- if (is_mask && exact_5tuple) {
- if (match->mask->key.eth.type != htons(0xffff))
- *exact_5tuple = false;
- }
-
if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
const struct ovs_key_ipv4 *ipv4_key;
@@ -589,13 +569,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple
SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
ipv4_key->ipv4_dst, is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
-
- if (is_mask && exact_5tuple && *exact_5tuple) {
- if (ipv4_key->ipv4_proto != 0xff ||
- ipv4_key->ipv4_src != htonl(0xffffffff) ||
- ipv4_key->ipv4_dst != htonl(0xffffffff))
- *exact_5tuple = false;
- }
}
if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
@@ -627,15 +600,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple
is_mask);
attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
-
- if (is_mask && exact_5tuple && *exact_5tuple) {
- if (ipv6_key->ipv6_proto != 0xff ||
- !is_all_set((const u8 *)ipv6_key->ipv6_src,
- sizeof(match->key->ipv6.addr.src)) ||
- !is_all_set((const u8 *)ipv6_key->ipv6_dst,
- sizeof(match->key->ipv6.addr.dst)))
- *exact_5tuple = false;
- }
}
if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
@@ -678,11 +642,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple
tcp_key->tcp_dst, is_mask);
}
attrs &= ~(1 << OVS_KEY_ATTR_TCP);
-
- if (is_mask && exact_5tuple && *exact_5tuple &&
- (tcp_key->tcp_src != htons(0xffff) ||
- tcp_key->tcp_dst != htons(0xffff)))
- *exact_5tuple = false;
}
if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
@@ -714,11 +673,6 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, bool *exact_5tuple
udp_key->udp_dst, is_mask);
}
attrs &= ~(1 << OVS_KEY_ATTR_UDP);
-
- if (is_mask && exact_5tuple && *exact_5tuple &&
- (udp_key->udp_src != htons(0xffff) ||
- udp_key->udp_dst != htons(0xffff)))
- *exact_5tuple = false;
}
if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
@@ -804,7 +758,6 @@ static void sw_flow_mask_set(struct sw_flow_mask *mask,
* attribute specifies the mask field of the wildcarded flow.
*/
int ovs_nla_get_match(struct sw_flow_match *match,
- bool *exact_5tuple,
const struct nlattr *key,
const struct nlattr *mask)
{
@@ -852,13 +805,10 @@ int ovs_nla_get_match(struct sw_flow_match *match,
}
}
- err = ovs_key_from_nlattrs(match, NULL, key_attrs, a, false);
+ err = ovs_key_from_nlattrs(match, key_attrs, a, false);
if (err)
return err;
- if (exact_5tuple)
- *exact_5tuple = true;
-
if (mask) {
err = parse_flow_mask_nlattrs(mask, a, &mask_attrs);
if (err)
@@ -896,7 +846,7 @@ int ovs_nla_get_match(struct sw_flow_match *match,
}
}
- err = ovs_key_from_nlattrs(match, exact_5tuple, mask_attrs, a, true);
+ err = ovs_key_from_nlattrs(match, mask_attrs, a, true);
if (err)
return err;
} else {
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index b31fbe2..4401510 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -45,7 +45,6 @@ int ovs_nla_put_flow(const struct sw_flow_key *,
int ovs_nla_get_flow_metadata(struct sw_flow *flow,
const struct nlattr *attr);
int ovs_nla_get_match(struct sw_flow_match *match,
- bool *exact_5tuple,
const struct nlattr *,
const struct nlattr *);
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 1ba1e0b..aa92da2 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -72,7 +72,7 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
*d++ = *s++ & *m++;
}
-struct sw_flow *ovs_flow_alloc(bool percpu_stats)
+struct sw_flow *ovs_flow_alloc(void)
{
struct sw_flow *flow;
int cpu;
@@ -84,25 +84,15 @@ struct sw_flow *ovs_flow_alloc(bool percpu_stats)
flow->sf_acts = NULL;
flow->mask = NULL;
- flow->stats.is_percpu = percpu_stats;
+ flow->stats = alloc_percpu(struct flow_stats);
+ if (!flow->stats)
+ goto err;
- if (!percpu_stats) {
- flow->stats.stat = kzalloc(sizeof(*flow->stats.stat), GFP_KERNEL);
- if (!flow->stats.stat)
- goto err;
+ for_each_possible_cpu(cpu) {
+ struct flow_stats *cpu_stats;
- spin_lock_init(&flow->stats.stat->lock);
- } else {
- flow->stats.cpu_stats = alloc_percpu(struct flow_stats);
- if (!flow->stats.cpu_stats)
- goto err;
-
- for_each_possible_cpu(cpu) {
- struct flow_stats *cpu_stats;
-
- cpu_stats = per_cpu_ptr(flow->stats.cpu_stats, cpu);
- spin_lock_init(&cpu_stats->lock);
- }
+ cpu_stats = per_cpu_ptr(flow->stats, cpu);
+ spin_lock_init(&cpu_stats->lock);
}
return flow;
err:
@@ -141,10 +131,7 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets)
static void flow_free(struct sw_flow *flow)
{
kfree((struct sf_flow_acts __force *)flow->sf_acts);
- if (flow->stats.is_percpu)
- free_percpu(flow->stats.cpu_stats);
- else
- kfree(flow->stats.stat);
+ free_percpu(flow->stats);
kmem_cache_free(flow_cache, flow);
}
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index baaeb10..c26c59a 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -55,7 +55,7 @@ struct flow_table {
int ovs_flow_init(void);
void ovs_flow_exit(void);
-struct sw_flow *ovs_flow_alloc(bool percpu_stats);
+struct sw_flow *ovs_flow_alloc(void);
void ovs_flow_free(struct sw_flow *, bool deferred);
int ovs_flow_tbl_init(struct flow_table *);
--
1.9.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH net-next 09/12] openvswitch: Per NUMA node flow stats.
[not found] ` <1400274459-56304-1-git-send-email-jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
` (7 preceding siblings ...)
2014-05-16 21:07 ` [PATCH net-next 08/12] openvswitch: Remove 5-tuple optimization Jesse Gross
@ 2014-05-16 21:07 ` Jesse Gross
2014-05-16 21:07 ` [PATCH net-next 10/12] openvswitch: Fix output of SCTP mask Jesse Gross
` (3 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Jesse Gross @ 2014-05-16 21:07 UTC (permalink / raw)
To: David Miller; +Cc: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA
From: Jarno Rajahalme <jrajahalme-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
Keep kernel flow stats for each NUMA node rather than each (logical)
CPU. This avoids using the per-CPU allocator and removes most of the
kernel-side OVS locking overhead otherwise on the top of perf reports
and allows OVS to scale better with higher number of threads.
With 9 handlers and 4 revalidators netperf TCP_CRR test flow setup
rate doubles on a server with two hyper-threaded physical CPUs (16
logical cores each) compared to the current OVS master. Tested with
non-trivial flow table with a TCP port match rule forcing all new
connections with unique port numbers to OVS userspace. The IP
addresses are still wildcarded, so the kernel flows are not considered
as exact match 5-tuple flows. This type of flows can be expected to
appear in large numbers as the result of more effective wildcarding
made possible by improvements in OVS userspace flow classifier.
Perf results for this test (master):
Events: 305K cycles
+ 8.43% ovs-vswitchd [kernel.kallsyms] [k] mutex_spin_on_owner
+ 5.64% ovs-vswitchd [kernel.kallsyms] [k] __ticket_spin_lock
+ 4.75% ovs-vswitchd ovs-vswitchd [.] find_match_wc
+ 3.32% ovs-vswitchd libpthread-2.15.so [.] pthread_mutex_lock
+ 2.61% ovs-vswitchd [kernel.kallsyms] [k] pcpu_alloc_area
+ 2.19% ovs-vswitchd ovs-vswitchd [.] flow_hash_in_minimask_range
+ 2.03% swapper [kernel.kallsyms] [k] intel_idle
+ 1.84% ovs-vswitchd libpthread-2.15.so [.] pthread_mutex_unlock
+ 1.64% ovs-vswitchd ovs-vswitchd [.] classifier_lookup
+ 1.58% ovs-vswitchd libc-2.15.so [.] 0x7f4e6
+ 1.07% ovs-vswitchd [kernel.kallsyms] [k] memset
+ 1.03% netperf [kernel.kallsyms] [k] __ticket_spin_lock
+ 0.92% swapper [kernel.kallsyms] [k] __ticket_spin_lock
...
And after this patch:
Events: 356K cycles
+ 6.85% ovs-vswitchd ovs-vswitchd [.] find_match_wc
+ 4.63% ovs-vswitchd libpthread-2.15.so [.] pthread_mutex_lock
+ 3.06% ovs-vswitchd [kernel.kallsyms] [k] __ticket_spin_lock
+ 2.81% ovs-vswitchd ovs-vswitchd [.] flow_hash_in_minimask_range
+ 2.51% ovs-vswitchd libpthread-2.15.so [.] pthread_mutex_unlock
+ 2.27% ovs-vswitchd ovs-vswitchd [.] classifier_lookup
+ 1.84% ovs-vswitchd libc-2.15.so [.] 0x15d30f
+ 1.74% ovs-vswitchd [kernel.kallsyms] [k] mutex_spin_on_owner
+ 1.47% swapper [kernel.kallsyms] [k] intel_idle
+ 1.34% ovs-vswitchd ovs-vswitchd [.] flow_hash_in_minimask
+ 1.33% ovs-vswitchd ovs-vswitchd [.] rule_actions_unref
+ 1.16% ovs-vswitchd ovs-vswitchd [.] hindex_node_with_hash
+ 1.16% ovs-vswitchd ovs-vswitchd [.] do_xlate_actions
+ 1.09% ovs-vswitchd ovs-vswitchd [.] ofproto_rule_ref
+ 1.01% netperf [kernel.kallsyms] [k] __ticket_spin_lock
...
There is a small increase in kernel spinlock overhead due to the same
spinlock being shared between multiple cores of the same physical CPU,
but that is barely visible in the netperf TCP_CRR test performance
(maybe ~1% performance drop, hard to tell exactly due to variance in
the test results), when testing for kernel module throughput (with no
userspace activity, handful of kernel flows).
On flow setup, a single stats instance is allocated (for the NUMA node
0). As CPUs from multiple NUMA nodes start updating stats, new
NUMA-node specific stats instances are allocated. This allocation on
the packet processing code path is made to never block or look for
emergency memory pools, minimizing the allocation latency. If the
allocation fails, the existing preallocated stats instance is used.
Also, if only CPUs from one NUMA-node are updating the preallocated
stats instance, no additional stats instances are allocated. This
eliminates the need to pre-allocate stats instances that will not be
used, also relieving the stats reader from the burden of reading stats
that are never used.
Signed-off-by: Jarno Rajahalme <jrajahalme-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
Acked-by: Pravin B Shelar <pshelar-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
Signed-off-by: Jesse Gross <jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
---
net/openvswitch/flow.c | 119 ++++++++++++++++++++++++++++---------------
net/openvswitch/flow.h | 10 +++-
net/openvswitch/flow_table.c | 46 +++++++++++++----
net/openvswitch/flow_table.h | 2 +
4 files changed, 122 insertions(+), 55 deletions(-)
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index aad7a8d..432f04d 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -65,8 +65,9 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)
{
struct flow_stats *stats;
__be16 tcp_flags = 0;
+ int node = numa_node_id();
- stats = this_cpu_ptr(flow->stats);
+ stats = rcu_dereference(flow->stats[node]);
if ((flow->key.eth.type == htons(ETH_P_IP) ||
flow->key.eth.type == htons(ETH_P_IPV6)) &&
@@ -76,68 +77,102 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)
tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb));
}
- spin_lock(&stats->lock);
+ /* Check if already have node-specific stats. */
+ if (likely(stats)) {
+ spin_lock(&stats->lock);
+ /* Mark if we write on the pre-allocated stats. */
+ if (node == 0 && unlikely(flow->stats_last_writer != node))
+ flow->stats_last_writer = node;
+ } else {
+ stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */
+ spin_lock(&stats->lock);
+
+ /* If the current NUMA-node is the only writer on the
+ * pre-allocated stats keep using them.
+ */
+ if (unlikely(flow->stats_last_writer != node)) {
+ /* A previous locker may have already allocated the
+ * stats, so we need to check again. If node-specific
+ * stats were already allocated, we update the pre-
+ * allocated stats as we have already locked them.
+ */
+ if (likely(flow->stats_last_writer != NUMA_NO_NODE)
+ && likely(!rcu_dereference(flow->stats[node]))) {
+ /* Try to allocate node-specific stats. */
+ struct flow_stats *new_stats;
+
+ new_stats =
+ kmem_cache_alloc_node(flow_stats_cache,
+ GFP_THISNODE |
+ __GFP_NOMEMALLOC,
+ node);
+ if (likely(new_stats)) {
+ new_stats->used = jiffies;
+ new_stats->packet_count = 1;
+ new_stats->byte_count = skb->len;
+ new_stats->tcp_flags = tcp_flags;
+ spin_lock_init(&new_stats->lock);
+
+ rcu_assign_pointer(flow->stats[node],
+ new_stats);
+ goto unlock;
+ }
+ }
+ flow->stats_last_writer = node;
+ }
+ }
+
stats->used = jiffies;
stats->packet_count++;
stats->byte_count += skb->len;
stats->tcp_flags |= tcp_flags;
- spin_unlock(&stats->lock);
-}
-
-static void stats_read(struct flow_stats *stats,
- struct ovs_flow_stats *ovs_stats,
- unsigned long *used, __be16 *tcp_flags)
-{
- spin_lock(&stats->lock);
- if (!*used || time_after(stats->used, *used))
- *used = stats->used;
- *tcp_flags |= stats->tcp_flags;
- ovs_stats->n_packets += stats->packet_count;
- ovs_stats->n_bytes += stats->byte_count;
+unlock:
spin_unlock(&stats->lock);
}
void ovs_flow_stats_get(struct sw_flow *flow, struct ovs_flow_stats *ovs_stats,
unsigned long *used, __be16 *tcp_flags)
{
- int cpu;
+ int node;
*used = 0;
*tcp_flags = 0;
memset(ovs_stats, 0, sizeof(*ovs_stats));
- local_bh_disable();
-
- for_each_possible_cpu(cpu) {
- struct flow_stats *stats;
+ for_each_node(node) {
+ struct flow_stats *stats = rcu_dereference(flow->stats[node]);
- stats = per_cpu_ptr(flow->stats.cpu_stats, cpu);
- stats_read(stats, ovs_stats, used, tcp_flags);
+ if (stats) {
+ /* Local CPU may write on non-local stats, so we must
+ * block bottom-halves here.
+ */
+ spin_lock_bh(&stats->lock);
+ if (!*used || time_after(stats->used, *used))
+ *used = stats->used;
+ *tcp_flags |= stats->tcp_flags;
+ ovs_stats->n_packets += stats->packet_count;
+ ovs_stats->n_bytes += stats->byte_count;
+ spin_unlock_bh(&stats->lock);
+ }
}
-
- local_bh_enable();
-}
-
-static void stats_reset(struct flow_stats *stats)
-{
- spin_lock(&stats->lock);
- stats->used = 0;
- stats->packet_count = 0;
- stats->byte_count = 0;
- stats->tcp_flags = 0;
- spin_unlock(&stats->lock);
}
void ovs_flow_stats_clear(struct sw_flow *flow)
{
- int cpu;
-
- local_bh_disable();
-
- for_each_possible_cpu(cpu)
- stats_reset(per_cpu_ptr(flow->stats, cpu));
-
- local_bh_enable();
+ int node;
+
+ for_each_node(node) {
+ struct flow_stats *stats = rcu_dereference(flow->stats[node]);
+
+ if (stats) {
+ spin_lock_bh(&stats->lock);
+ stats->used = 0;
+ stats->packet_count = 0;
+ stats->byte_count = 0;
+ stats->tcp_flags = 0;
+ spin_unlock_bh(&stats->lock);
+ }
+ }
}
static int check_header(struct sk_buff *skb, int len)
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 9c0dd8a..ddcebc5 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -159,12 +159,18 @@ struct sw_flow {
struct rcu_head rcu;
struct hlist_node hash_node[2];
u32 hash;
-
+ int stats_last_writer; /* NUMA-node id of the last writer on
+ * 'stats[0]'.
+ */
struct sw_flow_key key;
struct sw_flow_key unmasked_key;
struct sw_flow_mask *mask;
struct sw_flow_actions __rcu *sf_acts;
- struct flow_stats __percpu *stats;
+ struct flow_stats __rcu *stats[]; /* One for each NUMA node. First one
+ * is allocated at flow creation time,
+ * the rest are allocated on demand
+ * while holding the 'stats[0].lock'.
+ */
};
struct arp_eth_header {
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index aa92da2..d8ef37b 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -48,6 +48,7 @@
#define REHASH_INTERVAL (10 * 60 * HZ)
static struct kmem_cache *flow_cache;
+struct kmem_cache *flow_stats_cache __read_mostly;
static u16 range_n_bytes(const struct sw_flow_key_range *range)
{
@@ -75,7 +76,8 @@ void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
struct sw_flow *ovs_flow_alloc(void)
{
struct sw_flow *flow;
- int cpu;
+ struct flow_stats *stats;
+ int node;
flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
if (!flow)
@@ -83,17 +85,22 @@ struct sw_flow *ovs_flow_alloc(void)
flow->sf_acts = NULL;
flow->mask = NULL;
+ flow->stats_last_writer = NUMA_NO_NODE;
- flow->stats = alloc_percpu(struct flow_stats);
- if (!flow->stats)
+ /* Initialize the default stat node. */
+ stats = kmem_cache_alloc_node(flow_stats_cache,
+ GFP_KERNEL | __GFP_ZERO, 0);
+ if (!stats)
goto err;
- for_each_possible_cpu(cpu) {
- struct flow_stats *cpu_stats;
+ spin_lock_init(&stats->lock);
+
+ RCU_INIT_POINTER(flow->stats[0], stats);
+
+ for_each_node(node)
+ if (node != 0)
+ RCU_INIT_POINTER(flow->stats[node], NULL);
- cpu_stats = per_cpu_ptr(flow->stats, cpu);
- spin_lock_init(&cpu_stats->lock);
- }
return flow;
err:
kmem_cache_free(flow_cache, flow);
@@ -130,8 +137,13 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets)
static void flow_free(struct sw_flow *flow)
{
+ int node;
+
kfree((struct sf_flow_acts __force *)flow->sf_acts);
- free_percpu(flow->stats);
+ for_each_node(node)
+ if (flow->stats[node])
+ kmem_cache_free(flow_stats_cache,
+ (struct flow_stats __force *)flow->stats[node]);
kmem_cache_free(flow_cache, flow);
}
@@ -586,16 +598,28 @@ int ovs_flow_init(void)
BUILD_BUG_ON(__alignof__(struct sw_flow_key) % __alignof__(long));
BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long));
- flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0,
- 0, NULL);
+ flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
+ + (num_possible_nodes()
+ * sizeof(struct flow_stats *)),
+ 0, 0, NULL);
if (flow_cache == NULL)
return -ENOMEM;
+ flow_stats_cache
+ = kmem_cache_create("sw_flow_stats", sizeof(struct flow_stats),
+ 0, SLAB_HWCACHE_ALIGN, NULL);
+ if (flow_stats_cache == NULL) {
+ kmem_cache_destroy(flow_cache);
+ flow_cache = NULL;
+ return -ENOMEM;
+ }
+
return 0;
}
/* Uninitializes the flow module. */
void ovs_flow_exit(void)
{
+ kmem_cache_destroy(flow_stats_cache);
kmem_cache_destroy(flow_cache);
}
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index c26c59a..ca8a582 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -52,6 +52,8 @@ struct flow_table {
unsigned int count;
};
+extern struct kmem_cache *flow_stats_cache;
+
int ovs_flow_init(void);
void ovs_flow_exit(void);
--
1.9.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH net-next 10/12] openvswitch: Fix output of SCTP mask.
[not found] ` <1400274459-56304-1-git-send-email-jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
` (8 preceding siblings ...)
2014-05-16 21:07 ` [PATCH net-next 09/12] openvswitch: Per NUMA node flow stats Jesse Gross
@ 2014-05-16 21:07 ` Jesse Gross
2014-05-16 21:07 ` [PATCH net-next 11/12] openvswitch: Use TCP flags in the flow key for stats Jesse Gross
` (2 subsequent siblings)
12 siblings, 0 replies; 14+ messages in thread
From: Jesse Gross @ 2014-05-16 21:07 UTC (permalink / raw)
To: David Miller; +Cc: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA
From: Jarno Rajahalme <jrajahalme-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
The 'output' argument of the ovs_nla_put_flow() is the one from which
the bits are written to the netlink attributes. For SCTP we
accidentally used the bits from the 'swkey' instead. This caused the
mask attributes to include the bits from the actual flow key instead
of the mask.
Signed-off-by: Jarno Rajahalme <jrajahalme-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
Acked-by: Pravin B Shelar <pshelar-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
Signed-off-by: Jesse Gross <jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
---
net/openvswitch/flow_netlink.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 84caa99..32a725c 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1059,11 +1059,11 @@ int ovs_nla_put_flow(const struct sw_flow_key *swkey,
goto nla_put_failure;
sctp_key = nla_data(nla);
if (swkey->eth.type == htons(ETH_P_IP)) {
- sctp_key->sctp_src = swkey->ipv4.tp.src;
- sctp_key->sctp_dst = swkey->ipv4.tp.dst;
+ sctp_key->sctp_src = output->ipv4.tp.src;
+ sctp_key->sctp_dst = output->ipv4.tp.dst;
} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
- sctp_key->sctp_src = swkey->ipv6.tp.src;
- sctp_key->sctp_dst = swkey->ipv6.tp.dst;
+ sctp_key->sctp_src = output->ipv6.tp.src;
+ sctp_key->sctp_dst = output->ipv6.tp.dst;
}
} else if (swkey->eth.type == htons(ETH_P_IP) &&
swkey->ip.proto == IPPROTO_ICMP) {
--
1.9.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH net-next 11/12] openvswitch: Use TCP flags in the flow key for stats.
[not found] ` <1400274459-56304-1-git-send-email-jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
` (9 preceding siblings ...)
2014-05-16 21:07 ` [PATCH net-next 10/12] openvswitch: Fix output of SCTP mask Jesse Gross
@ 2014-05-16 21:07 ` Jesse Gross
2014-05-16 21:07 ` [PATCH net-next 12/12] net/openvswitch: Use with RCU_INIT_POINTER(x, NULL) in vport-gre.c Jesse Gross
2014-05-16 21:22 ` [GIT net-next] Open vSwitch David Miller
12 siblings, 0 replies; 14+ messages in thread
From: Jesse Gross @ 2014-05-16 21:07 UTC (permalink / raw)
To: David Miller; +Cc: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA
From: Jarno Rajahalme <jrajahalme-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
We already extract the TCP flags for the key, might as well use that
for stats.
Signed-off-by: Jarno Rajahalme <jrajahalme-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
Acked-by: Pravin B Shelar <pshelar-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
Signed-off-by: Jesse Gross <jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
---
net/openvswitch/flow.c | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 432f04d..e0fc12b 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -69,14 +69,12 @@ void ovs_flow_stats_update(struct sw_flow *flow, struct sk_buff *skb)
stats = rcu_dereference(flow->stats[node]);
- if ((flow->key.eth.type == htons(ETH_P_IP) ||
- flow->key.eth.type == htons(ETH_P_IPV6)) &&
- flow->key.ip.frag != OVS_FRAG_TYPE_LATER &&
- flow->key.ip.proto == IPPROTO_TCP &&
- likely(skb->len >= skb_transport_offset(skb) + sizeof(struct tcphdr))) {
- tcp_flags = TCP_FLAGS_BE16(tcp_hdr(skb));
+ if (likely(flow->key.ip.proto == IPPROTO_TCP)) {
+ if (likely(flow->key.eth.type == htons(ETH_P_IP)))
+ tcp_flags = flow->key.ipv4.tp.flags;
+ else if (likely(flow->key.eth.type == htons(ETH_P_IPV6)))
+ tcp_flags = flow->key.ipv6.tp.flags;
}
-
/* Check if already have node-specific stats. */
if (likely(stats)) {
spin_lock(&stats->lock);
--
1.9.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* [PATCH net-next 12/12] net/openvswitch: Use with RCU_INIT_POINTER(x, NULL) in vport-gre.c
[not found] ` <1400274459-56304-1-git-send-email-jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
` (10 preceding siblings ...)
2014-05-16 21:07 ` [PATCH net-next 11/12] openvswitch: Use TCP flags in the flow key for stats Jesse Gross
@ 2014-05-16 21:07 ` Jesse Gross
2014-05-16 21:22 ` [GIT net-next] Open vSwitch David Miller
12 siblings, 0 replies; 14+ messages in thread
From: Jesse Gross @ 2014-05-16 21:07 UTC (permalink / raw)
To: David Miller
Cc: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA,
Monam Agarwal
From: Monam Agarwal <monamagarwal123-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
This patch replaces rcu_assign_pointer(x, NULL) with RCU_INIT_POINTER(x, NULL)
The rcu_assign_pointer() ensures that the initialization of a structure
is carried out before storing a pointer to that structure.
And in the case of the NULL pointer, there is no structure to initialize.
So, rcu_assign_pointer(p, NULL) can be safely converted to RCU_INIT_POINTER(p, NULL)
Signed-off-by: Monam Agarwal <monamagarwal123-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Signed-off-by: Jesse Gross <jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
---
net/openvswitch/vport-gre.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 0856f01..35ec4fe 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -256,7 +256,7 @@ static void gre_tnl_destroy(struct vport *vport)
ovs_net = net_generic(net, ovs_net_id);
- rcu_assign_pointer(ovs_net->vport_net.gre_vport, NULL);
+ RCU_INIT_POINTER(ovs_net->vport_net.gre_vport, NULL);
ovs_vport_deferred_free(vport);
gre_exit();
}
--
1.9.1
^ permalink raw reply related [flat|nested] 14+ messages in thread
* Re: [GIT net-next] Open vSwitch
[not found] ` <1400274459-56304-1-git-send-email-jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
` (11 preceding siblings ...)
2014-05-16 21:07 ` [PATCH net-next 12/12] net/openvswitch: Use with RCU_INIT_POINTER(x, NULL) in vport-gre.c Jesse Gross
@ 2014-05-16 21:22 ` David Miller
12 siblings, 0 replies; 14+ messages in thread
From: David Miller @ 2014-05-16 21:22 UTC (permalink / raw)
To: jesse-l0M0P4e3n4LQT0dZR+AlfA
Cc: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA
From: Jesse Gross <jesse-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
Date: Fri, 16 May 2014 14:07:27 -0700
> A set of OVS changes for net-next/3.16.
>
> The major change here is a switch from per-CPU to per-NUMA flow
> statistics. This improves scalability by reducing kernel overhead
> in flow setup and maintenance.
Pulled, thanks Jesse.
^ permalink raw reply [flat|nested] 14+ messages in thread