* [PATCH 16/25] ip_generic_getfrag, udplite_getfrag: switch to passing msghdr
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>
From: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
include/net/udplite.h | 3 ++-
net/ipv4/ip_output.c | 6 +++---
net/ipv4/raw.c | 2 +-
net/ipv4/udp.c | 4 ++--
net/ipv6/raw.c | 2 +-
net/ipv6/udp.c | 2 +-
net/l2tp/l2tp_ip6.c | 2 +-
7 files changed, 11 insertions(+), 10 deletions(-)
diff --git a/include/net/udplite.h b/include/net/udplite.h
index 9a28a51..d5baaba 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -19,7 +19,8 @@ extern struct udp_table udplite_table;
static __inline__ int udplite_getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb)
{
- return memcpy_fromiovecend(to, (struct iovec *) from, offset, len);
+ struct msghdr *msg = from;
+ return memcpy_fromiovecend(to, msg->msg_iov, offset, len);
}
/* Designate sk as UDP-Lite socket */
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 4a929ad..cdedcf1 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -752,14 +752,14 @@ EXPORT_SYMBOL(ip_fragment);
int
ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
{
- struct iovec *iov = from;
+ struct msghdr *msg = from;
if (skb->ip_summed == CHECKSUM_PARTIAL) {
- if (memcpy_fromiovecend(to, iov, offset, len) < 0)
+ if (memcpy_fromiovecend(to, msg->msg_iov, offset, len) < 0)
return -EFAULT;
} else {
__wsum csum = 0;
- if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
+ if (csum_partial_copy_fromiovecend(to, msg->msg_iov, offset, len, &csum) < 0)
return -EFAULT;
skb->csum = csum_block_add(skb->csum, csum, odd);
}
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 5c901eb..5d83bd2 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -478,7 +478,7 @@ static int raw_getfrag(void *from, char *to, int offset, int len, int odd,
offset -= rfv->hlen;
- return ip_generic_getfrag(rfv->msg->msg_iov, to, offset, len, odd, skb);
+ return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb);
}
static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index dd8e006..13b4dcf8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1049,7 +1049,7 @@ back_from_confirm:
/* Lockless fast path for the non-corking case. */
if (!corkreq) {
- skb = ip_make_skb(sk, fl4, getfrag, msg->msg_iov, ulen,
+ skb = ip_make_skb(sk, fl4, getfrag, msg, ulen,
sizeof(struct udphdr), &ipc, &rt,
msg->msg_flags);
err = PTR_ERR(skb);
@@ -1080,7 +1080,7 @@ back_from_confirm:
do_append_data:
up->len += ulen;
- err = ip_append_data(sk, fl4, getfrag, msg->msg_iov, ulen,
+ err = ip_append_data(sk, fl4, getfrag, msg, ulen,
sizeof(struct udphdr), &ipc, &rt,
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
if (err)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 942f67b..11a9283 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -727,7 +727,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd,
offset -= rfv->hlen;
- return ip_generic_getfrag(rfv->msg->msg_iov, to, offset, len, odd, skb);
+ return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb);
}
static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 7f964322..189dc4a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1312,7 +1312,7 @@ do_append_data:
dontfrag = np->dontfrag;
up->len += ulen;
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
- err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen,
+ err = ip6_append_data(sk, getfrag, msg, ulen,
sizeof(struct udphdr), hlimit, tclass, opt, &fl6,
(struct rt6_info *)dst,
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 2177b96..8611f1b 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -619,7 +619,7 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk,
back_from_confirm:
lock_sock(sk);
- err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov,
+ err = ip6_append_data(sk, ip_generic_getfrag, msg,
ulen, transhdrlen, hlimit, tclass, opt,
&fl6, (struct rt6_info *)dst,
msg->msg_flags, dontfrag);
--
2.1.3
^ permalink raw reply related
* [PATCH 15/25] ipv6 equivalent of "ipv4: Avoid reading user iov twice after raw_probe_proto_opt"
From: Al Viro @ 2014-12-09 22:50 UTC (permalink / raw)
To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>
From: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
net/ipv6/raw.c | 112 ++++++++++++++++++++++++++++-----------------------------
1 file changed, 56 insertions(+), 56 deletions(-)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 8baa53e..942f67b 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -672,65 +672,62 @@ error:
return err;
}
-static int rawv6_probe_proto_opt(struct flowi6 *fl6, struct msghdr *msg)
+struct raw6_frag_vec {
+ struct msghdr *msg;
+ int hlen;
+ char c[4];
+};
+
+static int rawv6_probe_proto_opt(struct raw6_frag_vec *rfv, struct flowi6 *fl6)
{
- struct iovec *iov;
- u8 __user *type = NULL;
- u8 __user *code = NULL;
- u8 len = 0;
- int probed = 0;
- int i;
-
- if (!msg->msg_iov)
- return 0;
+ int err = 0;
+ switch (fl6->flowi6_proto) {
+ case IPPROTO_ICMPV6:
+ rfv->hlen = 2;
+ err = memcpy_from_msg(rfv->c, rfv->msg, rfv->hlen);
+ if (!err) {
+ fl6->fl6_icmp_type = rfv->c[0];
+ fl6->fl6_icmp_code = rfv->c[1];
+ }
+ break;
+ case IPPROTO_MH:
+ rfv->hlen = 4;
+ err = memcpy_from_msg(rfv->c, rfv->msg, rfv->hlen);
+ if (!err)
+ fl6->fl6_mh_type = rfv->c[2];
+ }
+ return err;
+}
- for (i = 0; i < msg->msg_iovlen; i++) {
- iov = &msg->msg_iov[i];
- if (!iov)
- continue;
+static int raw6_getfrag(void *from, char *to, int offset, int len, int odd,
+ struct sk_buff *skb)
+{
+ struct raw6_frag_vec *rfv = from;
- switch (fl6->flowi6_proto) {
- case IPPROTO_ICMPV6:
- /* check if one-byte field is readable or not. */
- if (iov->iov_base && iov->iov_len < 1)
- break;
-
- if (!type) {
- type = iov->iov_base;
- /* check if code field is readable or not. */
- if (iov->iov_len > 1)
- code = type + 1;
- } else if (!code)
- code = iov->iov_base;
-
- if (type && code) {
- if (get_user(fl6->fl6_icmp_type, type) ||
- get_user(fl6->fl6_icmp_code, code))
- return -EFAULT;
- probed = 1;
- }
- break;
- case IPPROTO_MH:
- if (iov->iov_base && iov->iov_len < 1)
- break;
- /* check if type field is readable or not. */
- if (iov->iov_len > 2 - len) {
- u8 __user *p = iov->iov_base;
- if (get_user(fl6->fl6_mh_type, &p[2 - len]))
- return -EFAULT;
- probed = 1;
- } else
- len += iov->iov_len;
+ if (offset < rfv->hlen) {
+ int copy = min(rfv->hlen - offset, len);
- break;
- default:
- probed = 1;
- break;
- }
- if (probed)
- break;
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ memcpy(to, rfv->c + offset, copy);
+ else
+ skb->csum = csum_block_add(
+ skb->csum,
+ csum_partial_copy_nocheck(rfv->c + offset,
+ to, copy, 0),
+ odd);
+
+ odd = 0;
+ offset += copy;
+ to += copy;
+ len -= copy;
+
+ if (!len)
+ return 0;
}
- return 0;
+
+ offset -= rfv->hlen;
+
+ return ip_generic_getfrag(rfv->msg->msg_iov, to, offset, len, odd, skb);
}
static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
@@ -745,6 +742,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
struct ipv6_txoptions *opt = NULL;
struct ip6_flowlabel *flowlabel = NULL;
struct dst_entry *dst = NULL;
+ struct raw6_frag_vec rfv;
struct flowi6 fl6;
int addr_len = msg->msg_namelen;
int hlimit = -1;
@@ -848,7 +846,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
opt = ipv6_fixup_options(&opt_space, opt);
fl6.flowi6_proto = proto;
- err = rawv6_probe_proto_opt(&fl6, msg);
+ rfv.msg = msg;
+ rfv.hlen = 0;
+ err = rawv6_probe_proto_opt(&rfv, &fl6);
if (err)
goto out;
@@ -889,7 +889,7 @@ back_from_confirm:
err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl6, &dst, msg->msg_flags);
else {
lock_sock(sk);
- err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov,
+ err = ip6_append_data(sk, raw6_getfrag, &rfv,
len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info *)dst,
msg->msg_flags, dontfrag);
--
2.1.3
^ permalink raw reply related
* Re: [PATCH net] netlink: use jhash as hashfn for rhashtable
From: Daniel Borkmann @ 2014-12-09 22:56 UTC (permalink / raw)
To: David Miller; +Cc: netdev, herbert, tgraf, hannes
In-Reply-To: <20141209.143829.477482216978677919.davem@davemloft.net>
On 12/09/2014 08:38 PM, David Miller wrote:
> From: Daniel Borkmann <dborkman@redhat.com>
> Date: Mon, 8 Dec 2014 17:30:30 +0100
>
>> For netlink, we shouldn't be using arch_fast_hash() as a hashing
>> discipline, but rather jhash() instead.
>>
>> Since netlink sockets can be opened by any user, a local attacker
>> would be able to easily create collisions with the DPDK-derived
>> arch_fast_hash(), which trades off performance for security by
>> using crc32 CPU instructions on x86_64.
>>
>> While it might have a legimite use case in other places, it should
>> be avoided in netlink context, though. As rhashtable's API is very
>> flexible, we could later on still decide on other hashing disciplines,
>> if legitimate.
>>
>> Reference: http://thread.gmane.org/gmane.linux.kernel/1844123
>> Fixes: e341694e3eb5 ("netlink: Convert netlink_lookup() to use RCU protected hash table")
>> Cc: Herbert Xu <herbert@gondor.apana.org.au>
>> Cc: Thomas Graf <tgraf@suug.ch>
>> Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
>> Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
>
> I think I've seen enough of this.
>
> First of all, you've left all of the example initializers in the
> rhashtable implementation recommending to use arch_fast_hash.
>
> Secondly, after this, openvswitch (and nfsd, ugh) are the only users
> remaining. Even though there have been claims that using this
> doesn't expose to openvswitch to being hash attackable, I'm still
> not entirely convinced that an attacker cannot hurt performance
> of an OVS node as a result of this.
>
> I think this whole scheme should be reverted, whatever cycles
> openvswitch gains by using crc32c instructions is far outweighed
> by the confusion this has caused and all of this infrastructure
> created for just one or two users.
>
> Someone send me a patch to revert all of the arch_fast_hash
> stuff, and every reference thereof, or else I'll do it myself.
Hm, this netlink patch was actually meant to be small for -stable.
But fair enough, I can look into removing arch_fast_hash() bits
entirely, sure - we thought it was useful in that area as it needs
less than half the cycles (all discussed in [1]) as opposed to
jhash for computing the hash value in ovs, where such computations
are being done very frequently and the flow key structure size seems
to keep growing.
Regarding an attacker wanting to hurt performance, upcalls into ovs
user space component to dynamically install such colliding in-kernel
ovs flow-cache entries would be the much bigger concern then, as a
pre-required step, imho. Anyway, if you feel strong about it, lets
just remove it.
Thanks.
[1] http://patchwork.ozlabs.org/patch/299369/
^ permalink raw reply
* Re: [PATCH v2] sh_eth: Remove redundant alignment adjustment
From: David Miller @ 2014-12-09 23:05 UTC (permalink / raw)
To: ykaneko0929; +Cc: netdev, horms, magnus.damm, linux-sh
In-Reply-To: <1418035581-3805-1-git-send-email-ykaneko0929@gmail.com>
From: Yoshihiro Kaneko <ykaneko0929@gmail.com>
Date: Mon, 8 Dec 2014 19:46:21 +0900
> From: Mitsuhiro Kimura <mitsuhiro.kimura.kc@renesas.com>
>
> PTR_ALIGN macro after skb_reserve is redundant, because skb_reserve
> function adjusts the alignment of skb->data.
>
> Signed-off-by: Mitsuhiro Kimura <mitsuhiro.kimura.kc@renesas.com>
> Signed-off-by: Yoshihiro Kaneko <ykaneko0929@gmail.com>
Applied.
^ permalink raw reply
* Re: [PATCH v3] sh_eth: Optimization for RX excess judgement
From: David Miller @ 2014-12-09 23:06 UTC (permalink / raw)
To: ykaneko0929; +Cc: netdev, horms, magnus.damm, linux-sh
In-Reply-To: <1418127822-16904-1-git-send-email-ykaneko0929@gmail.com>
From: Yoshihiro Kaneko <ykaneko0929@gmail.com>
Date: Tue, 9 Dec 2014 21:23:42 +0900
> From: Mitsuhiro Kimura <mitsuhiro.kimura.kc@renesas.com>
>
> Both of 'boguscnt' and 'quota' have nearly meaning as the condition of
> the reception loop.
> In order to cut down redundant processing, this patch changes excess
> judgement.
>
> Signed-off-by: Mitsuhiro Kimura <mitsuhiro.kimura.kc@renesas.com>
> Signed-off-by: Yoshihiro Kaneko <ykaneko0929@gmail.com>
Applied.
^ permalink raw reply
* Re: [PATCH net] netlink: use jhash as hashfn for rhashtable
From: David Miller @ 2014-12-09 23:09 UTC (permalink / raw)
To: dborkman; +Cc: netdev, herbert, tgraf, hannes
In-Reply-To: <54877E28.7020202@redhat.com>
From: Daniel Borkmann <dborkman@redhat.com>
Date: Tue, 09 Dec 2014 23:56:40 +0100
> we thought it was useful in that area as it needs less than half the
> cycles (all discussed in [1]) as opposed to jhash for computing the
> hash value in ovs, where such computations are being done very
> frequently and the flow key structure size seems to keep growing.
If that is the case, you could instead come up with a portable C hash
function that doesn't require jumping through hoops to use special CPU
instructions yet still has the better performance you're looking for.
Heck, if attackability isn't an issue, plain ole 'xor' over the u32's
of the key is sufficient and in fact is what our TCP socket hash used
to be before jhash.
> Anyway, if you feel strong about it, lets just remove it.
Please do.
^ permalink raw reply
* Re: the next chunk of iov_iter-net stuff for review
From: Al Viro @ 2014-12-09 23:13 UTC (permalink / raw)
To: David Miller; +Cc: netdev, linux-kernel
In-Reply-To: <20141209224928.GL22149@ZenIV.linux.org.uk>
On Tue, Dec 09, 2014 at 10:49:28PM +0000, Al Viro wrote:
> OK, rebase is in
> git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git for-davem-2,
> individual patches - in followups (first 13 - iov_iter, then 12 net-related)
PS: the pile next after that one is also there, in
git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs.git iov_iter-net-2
but that one contains several things I'd *really* like to hear comments
about (asked about all of them on netdev, no reply so far). Let's sort this
one out first...
Al, diving back into VFS-related queues...
^ permalink raw reply
* Re: [PATCH] fix suspicious rcu_dereference_check in net/sched/sch_fq_codel.c
From: John Fastabend @ 2014-12-09 23:13 UTC (permalink / raw)
To: Eric Dumazet, Valdis Kletnieks
Cc: Eric Dumazet, David S. Miller, linux-kernel, netdev
In-Reply-To: <1418161372.27198.11.camel@edumazet-glaptop2.roam.corp.google.com>
On 12/09/2014 01:42 PM, Eric Dumazet wrote:
> On Tue, 2014-12-09 at 16:15 -0500, Valdis Kletnieks wrote:
>> commit 46e5da40ae (net: qdisc: use rcu prefix and silence
>> sparse warnings) triggers a spurious warning:
>>
>> net/sched/sch_fq_codel.c:97 suspicious rcu_dereference_check() usage!
>>
>> The code should be using the _bh variant of rcu_dereference.
>>
>> Signed-off-by: Valdis Kletnieks <valdis.kletnieks@vt.edu>
>> ---
>> net/sched/sch_fq_codel.c | 2 +-
>> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> Acked-by: Eric Dumazet <edumazet@google.com>
>
> Thanks !
>
>
Great thanks for finding/fixing!
Acked-by: John Fastabend <john.r.fastabend@intel.com>
^ permalink raw reply
* Re: pull request: wireless-next 2014-12-08
From: David Miller @ 2014-12-09 23:22 UTC (permalink / raw)
To: linville-2XuSBdqkA4R54TAoqtyWWQ
Cc: linux-wireless-u79uwXL29TY76Z2rM5mHXA,
netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20141208193223.GA14086-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
From: "John W. Linville" <linville-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
Date: Mon, 8 Dec 2014 14:32:23 -0500
> Please pull this last batch of pending wireless updates for the 3.19
> tree...
Wow that's a lot :)
Pulled, thanks John.
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [PATCH net-next v5 0/3] remove bridge mode BRIDGE_MODE_SWDEV
From: David Miller @ 2014-12-09 23:25 UTC (permalink / raw)
To: roopa
Cc: jiri, sfeldma, jhs, bcrl, tgraf, john.fastabend, stephen,
linville, vyasevic, netdev, shm, gospo
In-Reply-To: <1418076261-24593-1-git-send-email-roopa@cumulusnetworks.com>
From: roopa@cumulusnetworks.com
Date: Mon, 8 Dec 2014 14:04:18 -0800
> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>
> BRIDGE_MODE_SWDEV was introduced to indicate switchdev offloads
> for bridging from user space (In other words to call into the hw switch
> port driver directly). But user can use existing BRIDGE_FLAGS_SELF
> to call into the hw switch port driver today. swdev mode is not required
> anymore. So, this patch removes it.
>
> v4 - v5
> incorporate comments
> - Define BRIDGE_MODE_UNDEF to handle cases where mode is not defined
> - reverse the order of patches
> - include patch comments in all patches
>
> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Series applied, thanks.
^ permalink raw reply
* Re: [PATCH net-next] net: systemport: allow changing MAC address
From: David Miller @ 2014-12-09 23:26 UTC (permalink / raw)
To: f.fainelli; +Cc: netdev
In-Reply-To: <1418083158-4027-1-git-send-email-f.fainelli@gmail.com>
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 8 Dec 2014 15:59:18 -0800
> Hook a ndo_set_mac_address callback, update the internal Ethernet MAC in
> the netdevice structure, and finally write that address down to the
> UniMAC registers. If the interface is down, and most likely clock gated,
> we do not update the registers but just the local copy, such that next
> ndo_open() call will effectively write down the address.
>
> Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Applied, thanks.
^ permalink raw reply
* Re: [PATCH net-next] tipc: avoid double lock 'spin_lock:&seq->lock'
From: David Miller @ 2014-12-09 23:27 UTC (permalink / raw)
To: ying.xue; +Cc: jon.maloy, netdev, tipc-discussion, dan.carpenter
In-Reply-To: <1418109476-2006-1-git-send-email-ying.xue@windriver.com>
From: Ying Xue <ying.xue@windriver.com>
Date: Tue, 9 Dec 2014 15:17:56 +0800
> The commit fb9962f3cefe ("tipc: ensure all name sequences are properly
> protected with its lock") involves below errors:
>
> net/tipc/name_table.c:980 tipc_purge_publications() error: double lock 'spin_lock:&seq->lock'
>
> Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
> Signed-off-by: Ying Xue <ying.xue@windriver.com>
Applied.
------------------------------------------------------------------------------
Download BIRT iHub F-Type - The Free Enterprise-Grade BIRT Server
from Actuate! Instantly Supercharge Your Business Reports and Dashboards
with Interactivity, Sharing, Native Excel Exports, App Integration & more
Get technology previously reserved for billion-dollar corporations, FREE
http://pubads.g.doubleclick.net/gampad/clk?id=164703151&iu=/4140/ostg.clktrk
^ permalink raw reply
* Re: [PATCH] stmmac: platform: adjust messages and move to dev level
From: David Miller @ 2014-12-09 23:28 UTC (permalink / raw)
To: andriy.shevchenko; +Cc: peppe.cavallaro, netdev
In-Reply-To: <1418119153-13355-1-git-send-email-andriy.shevchenko@linux.intel.com>
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 9 Dec 2014 11:59:13 +0200
> This patch amendes error and warning messages across the platform driver. It
> includes the following changes:
> - remove unneccessary message when no memory is available
> - change info level to warn in the validation functions
> - append \n to the end of messages
> - change pr_* macros to dev_*
>
> Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
This does not apply to the net-next tree, please respin.
^ permalink raw reply
* Re: [PATCH net-next 1/1] net: fec: avoid kernal crash by NULL pointer when no phy connection
From: David Miller @ 2014-12-09 23:29 UTC (permalink / raw)
To: b38611; +Cc: netdev, s.hauer, bhutchings, stephen
In-Reply-To: <1418122016-32084-1-git-send-email-b38611@freescale.com>
From: Fugang Duan <b38611@freescale.com>
Date: Tue, 9 Dec 2014 18:46:56 +0800
> On i.MX6SX sabreauto board, when there have no phy daughter board connection,
> there have kernel crash by NULL pointer:
...
> Add phydev check to fix the issue.
>
> Signed-off-by: Fugang Duan <B38611@freescale.com>
Applied, thanks.
^ permalink raw reply
* Re: [PATCH] netback: don't store invalid vif pointer
From: David Miller @ 2014-12-09 23:30 UTC (permalink / raw)
To: JBeulich; +Cc: wei.liu2, Ian.Campbell, xen-devel, netdev
In-Reply-To: <5486EF48020000780004E1B8@mail.emea.novell.com>
From: "Jan Beulich" <JBeulich@suse.com>
Date: Tue, 09 Dec 2014 11:47:04 +0000
> When xenvif_alloc() fails, it returns a non-NULL error indicator. To
> avoid eventual races, we shouldn't store that into struct backend_info
> as readers of it only check for NULL.
>
> Signed-off-by: Jan Beulich <jbeulich@suse.com>
Applied, thanks.
^ permalink raw reply
* Re: [PATCH net-next 0/9] r8169:update hardware ephy parameter
From: David Miller @ 2014-12-09 23:35 UTC (permalink / raw)
To: hau; +Cc: netdev, nic_swsd, linux-kernel
In-Reply-To: <1418143563-7652-1-git-send-email-hau@realtek.com>
From: Chunhao Lin <hau@realtek.com>
Date: Wed, 10 Dec 2014 00:45:54 +0800
> Update hardware ephy parameter to improve pcie compatibility.
This really doesn't tell me anything, I really dislike patch
series like this one.
All of the programming is magic values to magic offsets.
You aren't even trying to describe in the commit log message exactly
what kind of settings are being changed, and exactly how those changes
achieve the stated goal.
Furthermore, the commit description makes no sense at all to me.
How can programming the ethernet MAC PHY have any influence on PCI-E
bus compatability? Or are you programming the PCI bus interface's
PHY?
In what way are you adjusting which settings and in what way do those
adjustments help improve PCI-E bus behavior?
You absolutely must describe exactly what the new programming
is actually doing, precisely, and in detail. I want to know
if some kind of timings are being adjusted, and in what way.
Are some fifo limits being changes? If so, in what way, and
why does that help.
You have to describe what you are doing. Short and non-informative
commit log messages alongside random changes to undocumented magic
constant registers is simply unacceptable.
^ permalink raw reply
* Re: [PATCH net-next 0/9] r8169:update hardware ephy parameter
From: Francois Romieu @ 2014-12-09 23:47 UTC (permalink / raw)
To: Chunhao Lin; +Cc: netdev, nic_swsd, linux-kernel
In-Reply-To: <1418143563-7652-1-git-send-email-hau@realtek.com>
Chunhao Lin <hau@realtek.com> :
> Update hardware ephy parameter to improve pcie compatibility.
Neither the code nor the commit message helps to figure which
kind of behavioral change should/could be expected.
Bandwidth ? Latency ? Power management ? Compliance ? Stability ?
--
Ueimor
^ permalink raw reply
* Re: [PATCH net-next 5/9] r8169:update rtl8168fb ephy parameter
From: Francois Romieu @ 2014-12-09 23:48 UTC (permalink / raw)
To: Chunhao Lin; +Cc: netdev, nic_swsd, linux-kernel
In-Reply-To: <1418143563-7652-6-git-send-email-hau@realtek.com>
Chunhao Lin <hau@realtek.com> :
[rtl_hw_start_8168f_1 and rtl_hw_start_8168f_2 changes]
Different ephy_info data, same code. You may consider factoring it out.
--
Ueimor
^ permalink raw reply
* Re: [PATCH net-next 7/9] r8169:update rtl8168dp ephy parameter
From: Francois Romieu @ 2014-12-09 23:50 UTC (permalink / raw)
To: Chunhao Lin; +Cc: netdev, nic_swsd, linux-kernel
In-Reply-To: <1418143563-7652-8-git-send-email-hau@realtek.com>
Chunhao Lin <hau@realtek.com> :
[...]
> diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
> index a979519..42eda35 100644
> --- a/drivers/net/ethernet/realtek/r8169.c
> +++ b/drivers/net/ethernet/realtek/r8169.c
[...]
> static void rtl_hw_start_8168d_4(struct rtl8169_private *tp)
> {
> void __iomem *ioaddr = tp->mmio_addr;
> struct pci_dev *pdev = tp->pci_dev;
> static const struct ephy_info e_info_8168d_4[] = {
> - { 0x0b, ~0, 0x48 },
> - { 0x19, 0x20, 0x50 },
> - { 0x0c, ~0, 0x20 }
> + { 0x0b, 0x0000, 0x0048 },
> + { 0x19, 0x0020, 0x0050 },
> + { 0x0c, 0x0100, 0x0020 },
> + { 0x10, 0x0004, 0x0000 }
> };
> - int i;
>
> rtl_csi_access_enable_1(tp);
>
> - rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
> + if (tp->dev->mtu <= ETH_DATA_LEN)
> + rtl_tx_performance_tweak(pdev, 0x5 << MAX_READ_REQUEST_SHIFT);
[...]
> @@ -6328,11 +6308,8 @@ static void rtl_hw_start_8168(struct net_device *dev)
> break;
>
> case RTL_GIGA_MAC_VER_28:
> - rtl_hw_start_8168d_4(tp);
> - break;
> -
> case RTL_GIGA_MAC_VER_31:
> - rtl_hw_start_8168dp(tp);
> + rtl_hw_start_8168d_4(tp);
RTL_GIGA_MAC_VER_28 would thus use a mtu dependant rtl_tx_performance_tweak
in its hw_start handler but would not include one in its jumbo_ops helpers.
It does not seem completely right.
--
Ueimor
^ permalink raw reply
* [PATCH] net: openvswitch: Support masked set actions.
From: Jarno Rajahalme @ 2014-12-10 0:10 UTC (permalink / raw)
To: netdev-u79uwXL29TY76Z2rM5mHXA, dev-yBygre7rU0TnMu66kgdUjQ
OVS userspace already probes the openvswitch kernel module for
OVS_ACTION_ATTR_SET_MASKED support. This patch adds the kernel module
implementation of masked set actions.
The existing set action sets many fields at once. When only a subset
of the IP header fields, for example, should be modified, all the IP
fields need to be exact matched so that the other field values can be
copied to the set action. A masked set action allows modification of
an arbitrary subset of the supported header bits without requiring the
rest to be matched.
Masked set action is now supported for all writeable key types, except
for the tunnel key. The set tunnel action is an exception as any
input tunnel info is cleared before action processing starts, so there
is no tunnel info to mask.
The kernel module converts all (non-tunnel) set actions to masked set
actions. This makes action processing more uniform, and results in
less branching and duplicating the action processing code. When
returning actions to userspace, the fully masked set actions are
converted back to normal set actions. We use a kernel internal action
code to be able to tell the userspace provided and converted masked
set actions apart.
Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
---
include/uapi/linux/openvswitch.h | 22 ++-
net/openvswitch/actions.c | 367 ++++++++++++++++++++++++--------------
net/openvswitch/flow_netlink.c | 168 ++++++++++++++---
3 files changed, 396 insertions(+), 161 deletions(-)
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 3a6dcaa..f4bd703 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -564,6 +564,12 @@ struct ovs_action_hash {
* @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header. The
* single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its
* value.
+ * @OVS_ACTION_ATTR_SET_MASKED: Replaces the contents of an existing header. A
+ * nested %OVS_KEY_ATTR_* attribute specifies a header to modify, its value,
+ * and a mask. For every bit set in the mask, the corresponding bit value
+ * is copied from the value to the packet header field, rest of the bits are
+ * left unchanged. The non-masked value bits must be passed in as zeroes.
+ * Masking is not supported for the %OVS_KEY_ATTR_TUNNEL attribute.
* @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the
* packet.
* @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet.
@@ -582,6 +588,9 @@ struct ovs_action_hash {
* Only a single header can be set with a single %OVS_ACTION_ATTR_SET. Not all
* fields within a header are modifiable, e.g. the IPv4 protocol and fragment
* type may not be changed.
+ *
+ * @OVS_ACTION_ATTR_SET_TO_MASKED: Kernel internal masked set action translated
+ * from the @OVS_ACTION_ATTR_SET.
*/
enum ovs_action_attr {
@@ -596,8 +605,19 @@ enum ovs_action_attr {
OVS_ACTION_ATTR_HASH, /* struct ovs_action_hash. */
OVS_ACTION_ATTR_PUSH_MPLS, /* struct ovs_action_push_mpls. */
OVS_ACTION_ATTR_POP_MPLS, /* __be16 ethertype. */
+ OVS_ACTION_ATTR_SET_MASKED, /* One nested OVS_KEY_ATTR_* including
+ * data immediately followed by a mask.
+ * The data must be zero for the unmasked
+ * bits. */
+
+ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted
+ * from userspace. */
- __OVS_ACTION_ATTR_MAX
+#ifdef __KERNEL__
+ OVS_ACTION_ATTR_SET_TO_MASKED, /* Kernel module internal masked
+ * set action converted from
+ * OVS_ACTION_ATTR_SET. */
+#endif
};
#define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 764fdc3..ee57b9b 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -184,10 +184,15 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
return 0;
}
-static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key,
- const __be32 *mpls_lse)
+/* 'KEY' must not have any bits set outside of the 'MASK' */
+#define MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
+#define SET_MASKED(OLD, KEY, MASK) (OLD) = MASKED(OLD, KEY, MASK)
+
+static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
+ const __be32 *mpls_lse, const __be32 *mask)
{
__be32 *stack;
+ __be32 lse;
int err;
err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
@@ -195,14 +200,15 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key,
return err;
stack = (__be32 *)skb_mpls_header(skb);
+ lse = MASKED(*stack, *mpls_lse, *mask);
if (skb->ip_summed == CHECKSUM_COMPLETE) {
- __be32 diff[] = { ~(*stack), *mpls_lse };
+ __be32 diff[] = { ~(*stack), lse };
skb->csum = ~csum_partial((char *)diff, sizeof(diff),
~skb->csum);
}
- *stack = *mpls_lse;
- key->mpls.top_lse = *mpls_lse;
+ *stack = lse;
+ flow_key->mpls.top_lse = lse;
return 0;
}
@@ -229,8 +235,21 @@ static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
}
-static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ovs_key_ethernet *eth_key)
+/* 'src' is already properly masked. */
+static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_)
+{
+ u16 *dst = (u16 *)dst_;
+ const u16 *src = (const u16 *)src_;
+ const u16 *mask = (const u16 *)mask_;
+
+ SET_MASKED(dst[0], src[0], mask[0]);
+ SET_MASKED(dst[1], src[1], mask[1]);
+ SET_MASKED(dst[2], src[2], mask[2]);
+}
+
+static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
+ const struct ovs_key_ethernet *key,
+ const struct ovs_key_ethernet *mask)
{
int err;
err = skb_ensure_writable(skb, ETH_HLEN);
@@ -239,13 +258,15 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *key,
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
- ether_addr_copy(eth_hdr(skb)->h_source, eth_key->eth_src);
- ether_addr_copy(eth_hdr(skb)->h_dest, eth_key->eth_dst);
+ ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src,
+ mask->eth_src);
+ ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
+ mask->eth_dst);
ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
- ether_addr_copy(key->eth.src, eth_key->eth_src);
- ether_addr_copy(key->eth.dst, eth_key->eth_dst);
+ ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
+ ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
return 0;
}
@@ -303,6 +324,15 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
}
}
+static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4],
+ const __be32 mask[4], __be32 masked[4])
+{
+ masked[0] = MASKED(old[0], addr[0], mask[0]);
+ masked[1] = MASKED(old[1], addr[1], mask[1]);
+ masked[2] = MASKED(old[2], addr[2], mask[2]);
+ masked[3] = MASKED(old[3], addr[3], mask[3]);
+}
+
static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
__be32 addr[4], const __be32 new_addr[4],
bool recalculate_csum)
@@ -314,29 +344,29 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
memcpy(addr, new_addr, sizeof(__be32[4]));
}
-static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc)
+static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
{
- nh->priority = tc >> 4;
- nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4);
+ /* Bits 21-24 are always unmasked, so this retains their values. */
+ SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
+ SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
+ SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
}
-static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl)
+static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
+ u8 mask)
{
- nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16;
- nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8;
- nh->flow_lbl[2] = fl & 0x000000FF;
-}
+ new_ttl = MASKED(nh->ttl, new_ttl, mask);
-static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
-{
csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
nh->ttl = new_ttl;
}
-static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ovs_key_ipv4 *ipv4_key)
+static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,
+ const struct ovs_key_ipv4 *key,
+ const struct ovs_key_ipv4 *mask)
{
struct iphdr *nh;
+ __be32 new_addr;
int err;
err = skb_ensure_writable(skb, skb_network_offset(skb) +
@@ -346,36 +376,48 @@ static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key,
nh = ip_hdr(skb);
- if (ipv4_key->ipv4_src != nh->saddr) {
- set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src);
- key->ipv4.addr.src = ipv4_key->ipv4_src;
- }
+ /* Setting an IP addresses is typically only a side effect of
+ * matching on them in the current userspace implementation, so it
+ * makes sense to check if the value actually changed. */
+ if (mask->ipv4_src) {
+ new_addr = MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
- if (ipv4_key->ipv4_dst != nh->daddr) {
- set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst);
- key->ipv4.addr.dst = ipv4_key->ipv4_dst;
+ if (unlikely(new_addr != nh->saddr)) {
+ set_ip_addr(skb, nh, &nh->saddr, new_addr);
+ flow_key->ipv4.addr.src = new_addr;
+ }
}
+ if (mask->ipv4_dst) {
+ new_addr = MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
- if (ipv4_key->ipv4_tos != nh->tos) {
- ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos);
- key->ip.tos = nh->tos;
+ if (unlikely(new_addr != nh->daddr)) {
+ set_ip_addr(skb, nh, &nh->daddr, new_addr);
+ flow_key->ipv4.addr.dst = new_addr;
+ }
}
-
- if (ipv4_key->ipv4_ttl != nh->ttl) {
- set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl);
- key->ip.ttl = ipv4_key->ipv4_ttl;
+ if (mask->ipv4_tos) {
+ ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos);
+ flow_key->ip.tos = nh->tos;
+ }
+ if (mask->ipv4_ttl) {
+ set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl);
+ flow_key->ip.ttl = nh->ttl;
}
return 0;
}
-static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ovs_key_ipv6 *ipv6_key)
+static bool is_ipv6_mask_nonzero(const __be32 addr[4])
+{
+ return !!(addr[0] | addr[1] | addr[2] | addr[3]);
+}
+
+static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
+ const struct ovs_key_ipv6 *key,
+ const struct ovs_key_ipv6 *mask)
{
struct ipv6hdr *nh;
int err;
- __be32 *saddr;
- __be32 *daddr;
err = skb_ensure_writable(skb, skb_network_offset(skb) +
sizeof(struct ipv6hdr));
@@ -383,40 +425,59 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key,
return err;
nh = ipv6_hdr(skb);
- saddr = (__be32 *)&nh->saddr;
- daddr = (__be32 *)&nh->daddr;
-
- if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) {
- set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr,
- ipv6_key->ipv6_src, true);
- memcpy(&key->ipv6.addr.src, ipv6_key->ipv6_src,
- sizeof(ipv6_key->ipv6_src));
- }
- if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) {
+ /* Setting an IP addresses is typically only a side effect of
+ * matching on them in the current userspace implementation, so it
+ * makes sense to check if the value actually changed. */
+ if (is_ipv6_mask_nonzero(mask->ipv6_src)) {
+ __be32 *saddr = (__be32 *)&nh->saddr;
+ __be32 masked[4];
+
+ mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);
+
+ if (unlikely(memcmp(saddr, masked, sizeof masked))) {
+ set_ipv6_addr(skb, key->ipv6_proto, saddr, masked,
+ true);
+ memcpy(&flow_key->ipv6.addr.src, masked,
+ sizeof(flow_key->ipv6.addr.src));
+ }
+ }
+ if (is_ipv6_mask_nonzero(mask->ipv6_dst)) {
unsigned int offset = 0;
int flags = IP6_FH_F_SKIP_RH;
bool recalc_csum = true;
-
- if (ipv6_ext_hdr(nh->nexthdr))
- recalc_csum = ipv6_find_hdr(skb, &offset,
- NEXTHDR_ROUTING, NULL,
- &flags) != NEXTHDR_ROUTING;
-
- set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr,
- ipv6_key->ipv6_dst, recalc_csum);
- memcpy(&key->ipv6.addr.dst, ipv6_key->ipv6_dst,
- sizeof(ipv6_key->ipv6_dst));
+ __be32 *daddr = (__be32 *)&nh->daddr;
+ __be32 masked[4];
+
+ mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked);
+
+ if (unlikely(memcmp(daddr, masked, sizeof masked))) {
+ if (ipv6_ext_hdr(nh->nexthdr))
+ recalc_csum = (ipv6_find_hdr(skb, &offset,
+ NEXTHDR_ROUTING,
+ NULL, &flags)
+ != NEXTHDR_ROUTING);
+
+ set_ipv6_addr(skb, key->ipv6_proto, daddr, masked,
+ recalc_csum);
+ memcpy(&flow_key->ipv6.addr.dst, masked,
+ sizeof(flow_key->ipv6.addr.dst));
+ }
+ }
+ if (mask->ipv6_tclass) {
+ ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
+ flow_key->ip.tos = ipv6_get_dsfield(nh);
+ }
+ if (mask->ipv6_label) {
+ set_ipv6_fl(nh, ntohl(key->ipv6_label),
+ ntohl(mask->ipv6_label));
+ flow_key->ipv6.label =
+ *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
+ }
+ if (mask->ipv6_hlimit) {
+ SET_MASKED(nh->hop_limit, key->ipv6_hlimit, mask->ipv6_hlimit);
+ flow_key->ip.ttl = nh->hop_limit;
}
-
- set_ipv6_tc(nh, ipv6_key->ipv6_tclass);
- key->ip.tos = ipv6_get_dsfield(nh);
-
- set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
- key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
-
- nh->hop_limit = ipv6_key->ipv6_hlimit;
- key->ip.ttl = ipv6_key->ipv6_hlimit;
return 0;
}
@@ -426,28 +487,14 @@ static void set_tp_port(struct sk_buff *skb, __be16 *port,
{
inet_proto_csum_replace2(check, skb, *port, new_port, 0);
*port = new_port;
- skb_clear_hash(skb);
-}
-
-static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port)
-{
- struct udphdr *uh = udp_hdr(skb);
-
- if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
- set_tp_port(skb, port, new_port, &uh->check);
-
- if (!uh->check)
- uh->check = CSUM_MANGLED_0;
- } else {
- *port = new_port;
- skb_clear_hash(skb);
- }
}
-static int set_udp(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ovs_key_udp *udp_port_key)
+static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
+ const struct ovs_key_udp *key,
+ const struct ovs_key_udp *mask)
{
struct udphdr *uh;
+ __be16 src, dst;
int err;
err = skb_ensure_writable(skb, skb_transport_offset(skb) +
@@ -456,23 +503,40 @@ static int set_udp(struct sk_buff *skb, struct sw_flow_key *key,
return err;
uh = udp_hdr(skb);
- if (udp_port_key->udp_src != uh->source) {
- set_udp_port(skb, &uh->source, udp_port_key->udp_src);
- key->tp.src = udp_port_key->udp_src;
- }
+ /* Either of the masks is non-zero, so do not bother checking them. */
+ src = MASKED(uh->source, key->udp_src, mask->udp_src);
+ dst = MASKED(uh->dest, key->udp_dst, mask->udp_dst);
- if (udp_port_key->udp_dst != uh->dest) {
- set_udp_port(skb, &uh->dest, udp_port_key->udp_dst);
- key->tp.dst = udp_port_key->udp_dst;
+ if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
+ if (likely(src != uh->source)) {
+ set_tp_port(skb, &uh->source, src, &uh->check);
+ flow_key->tp.src = src;
+ }
+ if (likely(dst != uh->dest)) {
+ set_tp_port(skb, &uh->dest, dst, &uh->check);
+ flow_key->tp.dst = dst;
+ }
+
+ if (unlikely(!uh->check))
+ uh->check = CSUM_MANGLED_0;
+ } else {
+ uh->source = src;
+ uh->dest = dst;
+ flow_key->tp.src = src;
+ flow_key->tp.dst = dst;
}
+ skb_clear_hash(skb);
+
return 0;
}
-static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ovs_key_tcp *tcp_port_key)
+static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key,
+ const struct ovs_key_tcp *key,
+ const struct ovs_key_tcp *mask)
{
struct tcphdr *th;
+ __be16 src, dst;
int err;
err = skb_ensure_writable(skb, skb_transport_offset(skb) +
@@ -481,50 +545,49 @@ static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key,
return err;
th = tcp_hdr(skb);
- if (tcp_port_key->tcp_src != th->source) {
- set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check);
- key->tp.src = tcp_port_key->tcp_src;
+ src = MASKED(th->source, key->tcp_src, mask->tcp_src);
+ if (likely(src != th->source)) {
+ set_tp_port(skb, &th->source, src, &th->check);
+ flow_key->tp.src = src;
}
-
- if (tcp_port_key->tcp_dst != th->dest) {
- set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check);
- key->tp.dst = tcp_port_key->tcp_dst;
+ dst = MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
+ if (likely(dst != th->dest)) {
+ set_tp_port(skb, &th->dest, dst, &th->check);
+ flow_key->tp.dst = dst;
}
+ skb_clear_hash(skb);
return 0;
}
-static int set_sctp(struct sk_buff *skb, struct sw_flow_key *key,
- const struct ovs_key_sctp *sctp_port_key)
+static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
+ const struct ovs_key_sctp *key,
+ const struct ovs_key_sctp *mask)
{
+ unsigned int sctphoff = skb_transport_offset(skb);
struct sctphdr *sh;
+ __le32 old_correct_csum, new_csum, old_csum;
int err;
- unsigned int sctphoff = skb_transport_offset(skb);
err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr));
if (unlikely(err))
return err;
sh = sctp_hdr(skb);
- if (sctp_port_key->sctp_src != sh->source ||
- sctp_port_key->sctp_dst != sh->dest) {
- __le32 old_correct_csum, new_csum, old_csum;
-
- old_csum = sh->checksum;
- old_correct_csum = sctp_compute_cksum(skb, sctphoff);
+ old_csum = sh->checksum;
+ old_correct_csum = sctp_compute_cksum(skb, sctphoff);
- sh->source = sctp_port_key->sctp_src;
- sh->dest = sctp_port_key->sctp_dst;
+ sh->source = MASKED(sh->source, key->sctp_src, mask->sctp_src);
+ sh->dest = MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
- new_csum = sctp_compute_cksum(skb, sctphoff);
+ new_csum = sctp_compute_cksum(skb, sctphoff);
- /* Carry any checksum errors through. */
- sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
+ /* Carry any checksum errors through. */
+ sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
- skb_clear_hash(skb);
- key->tp.src = sctp_port_key->sctp_src;
- key->tp.dst = sctp_port_key->sctp_dst;
- }
+ skb_clear_hash(skb);
+ flow_key->tp.src = sh->source;
+ flow_key->tp.dst = sh->dest;
return 0;
}
@@ -652,52 +715,77 @@ static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
key->ovs_flow_hash = hash;
}
-static int execute_set_action(struct sk_buff *skb, struct sw_flow_key *key,
- const struct nlattr *nested_attr)
+static int execute_set_action(struct sk_buff *skb,
+ struct sw_flow_key *flow_key,
+ const struct nlattr *a)
+{
+ /* Only tunnel set execution is supported without a mask. */
+ if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) {
+ OVS_CB(skb)->egress_tun_info = nla_data(a);
+ return 0;
+ }
+
+ return -EINVAL;
+
+}
+
+/* Mask is at the midpoint of the data. */
+#define get_mask(a, type) ((const type *)nla_data(a) + 1)
+
+static int execute_masked_set_action(struct sk_buff *skb,
+ struct sw_flow_key *flow_key,
+ const struct nlattr *a)
{
int err = 0;
- switch (nla_type(nested_attr)) {
+ switch (nla_type(a)) {
case OVS_KEY_ATTR_PRIORITY:
- skb->priority = nla_get_u32(nested_attr);
- key->phy.priority = skb->priority;
+ SET_MASKED(skb->priority, nla_get_u32(a), *get_mask(a, u32));
+ flow_key->phy.priority = skb->priority;
break;
case OVS_KEY_ATTR_SKB_MARK:
- skb->mark = nla_get_u32(nested_attr);
- key->phy.skb_mark = skb->mark;
+ SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32));
+ flow_key->phy.skb_mark = skb->mark;
break;
case OVS_KEY_ATTR_TUNNEL_INFO:
- OVS_CB(skb)->egress_tun_info = nla_data(nested_attr);
+ /* Masked data not supported for tunnel. */
+ err = -EINVAL;
break;
case OVS_KEY_ATTR_ETHERNET:
- err = set_eth_addr(skb, key, nla_data(nested_attr));
+ err = set_eth_addr(skb, flow_key, nla_data(a),
+ get_mask(a, struct ovs_key_ethernet));
break;
case OVS_KEY_ATTR_IPV4:
- err = set_ipv4(skb, key, nla_data(nested_attr));
+ err = set_ipv4(skb, flow_key, nla_data(a),
+ get_mask(a, struct ovs_key_ipv4));
break;
case OVS_KEY_ATTR_IPV6:
- err = set_ipv6(skb, key, nla_data(nested_attr));
+ err = set_ipv6(skb, flow_key, nla_data(a),
+ get_mask(a, struct ovs_key_ipv6));
break;
case OVS_KEY_ATTR_TCP:
- err = set_tcp(skb, key, nla_data(nested_attr));
+ err = set_tcp(skb, flow_key, nla_data(a),
+ get_mask(a, struct ovs_key_tcp));
break;
case OVS_KEY_ATTR_UDP:
- err = set_udp(skb, key, nla_data(nested_attr));
+ err = set_udp(skb, flow_key, nla_data(a),
+ get_mask(a, struct ovs_key_udp));
break;
case OVS_KEY_ATTR_SCTP:
- err = set_sctp(skb, key, nla_data(nested_attr));
+ err = set_sctp(skb, flow_key, nla_data(a),
+ get_mask(a, struct ovs_key_sctp));
break;
case OVS_KEY_ATTR_MPLS:
- err = set_mpls(skb, key, nla_data(nested_attr));
+ err = set_mpls(skb, flow_key, nla_data(a), get_mask(a, __be32));
break;
}
@@ -817,6 +905,11 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
err = execute_set_action(skb, key, nla_data(a));
break;
+ case OVS_ACTION_ATTR_SET_MASKED:
+ case OVS_ACTION_ATTR_SET_TO_MASKED:
+ err = execute_masked_set_action(skb, key, nla_data(a));
+ break;
+
case OVS_ACTION_ATTR_SAMPLE:
err = sample(dp, skb, key, a);
break;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index df3c7f2..276bb60 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1609,23 +1609,44 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
return err;
}
+/* Return false if there are any non-masked bits set.
+ * Mask follows data immediately, before any netlink padding. */
+static bool validate_masked(u8 *data, int len)
+{
+ u8 *mask = data + len;
+
+ while (len--)
+ if (*data++ & ~*mask++)
+ return false;
+
+ return true;
+}
+
static int validate_set(const struct nlattr *a,
const struct sw_flow_key *flow_key,
struct sw_flow_actions **sfa,
- bool *set_tun, __be16 eth_type, bool log)
+ bool *skip_copy, __be16 eth_type, bool masked, bool log)
{
const struct nlattr *ovs_key = nla_data(a);
int key_type = nla_type(ovs_key);
+ size_t key_len;
/* There can be only one key in a action */
if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
return -EINVAL;
+ key_len = nla_len(ovs_key);
+ if (masked)
+ key_len /= 2;
+
if (key_type > OVS_KEY_ATTR_MAX ||
- (ovs_key_lens[key_type] != nla_len(ovs_key) &&
+ (ovs_key_lens[key_type] != key_len &&
ovs_key_lens[key_type] != -1))
return -EINVAL;
+ if (masked && !validate_masked(nla_data(ovs_key), key_len))
+ return -EINVAL;
+
switch (key_type) {
const struct ovs_key_ipv4 *ipv4_key;
const struct ovs_key_ipv6 *ipv6_key;
@@ -1640,7 +1661,10 @@ static int validate_set(const struct nlattr *a,
if (eth_p_mpls(eth_type))
return -EINVAL;
- *set_tun = true;
+ if (masked)
+ return -EINVAL; /* Masked tunnel set not supported. */
+
+ *skip_copy = true;
err = validate_and_copy_set_tun(a, sfa, log);
if (err)
return err;
@@ -1650,32 +1674,46 @@ static int validate_set(const struct nlattr *a,
if (eth_type != htons(ETH_P_IP))
return -EINVAL;
- if (!flow_key->ip.proto)
- return -EINVAL;
-
ipv4_key = nla_data(ovs_key);
- if (ipv4_key->ipv4_proto != flow_key->ip.proto)
- return -EINVAL;
- if (ipv4_key->ipv4_frag != flow_key->ip.frag)
- return -EINVAL;
+ if (masked) {
+ const struct ovs_key_ipv4 *mask = ipv4_key + 1;
+ /* Non-writeable fields. */
+ if (mask->ipv4_proto || mask->ipv4_frag)
+ return -EINVAL;
+ } else {
+ if (ipv4_key->ipv4_proto != flow_key->ip.proto)
+ return -EINVAL;
+
+ if (ipv4_key->ipv4_frag != flow_key->ip.frag)
+ return -EINVAL;
+ }
break;
case OVS_KEY_ATTR_IPV6:
if (eth_type != htons(ETH_P_IPV6))
return -EINVAL;
- if (!flow_key->ip.proto)
- return -EINVAL;
-
ipv6_key = nla_data(ovs_key);
- if (ipv6_key->ipv6_proto != flow_key->ip.proto)
- return -EINVAL;
- if (ipv6_key->ipv6_frag != flow_key->ip.frag)
- return -EINVAL;
+ if (masked) {
+ const struct ovs_key_ipv6 *mask = ipv6_key + 1;
+ /* Non-writeable fields. */
+ if (mask->ipv6_proto || mask->ipv6_frag)
+ return -EINVAL;
+
+ /* Invalid bits in the flow label mask? */
+ if (ntohl(mask->ipv6_label) & 0xFFF00000)
+ return -EINVAL;
+ } else {
+ if (ipv6_key->ipv6_proto != flow_key->ip.proto)
+ return -EINVAL;
+
+ if (ipv6_key->ipv6_frag != flow_key->ip.frag)
+ return -EINVAL;
+ }
if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
return -EINVAL;
@@ -1685,13 +1723,19 @@ static int validate_set(const struct nlattr *a,
if (flow_key->ip.proto != IPPROTO_TCP)
return -EINVAL;
- return validate_tp_port(flow_key, eth_type);
+ err = validate_tp_port(flow_key, eth_type);
+ if (err)
+ return err;
+ break;
case OVS_KEY_ATTR_UDP:
if (flow_key->ip.proto != IPPROTO_UDP)
return -EINVAL;
- return validate_tp_port(flow_key, eth_type);
+ err = validate_tp_port(flow_key, eth_type);
+ if (err)
+ return err;
+ break;
case OVS_KEY_ATTR_MPLS:
if (!eth_p_mpls(eth_type))
@@ -1702,12 +1746,43 @@ static int validate_set(const struct nlattr *a,
if (flow_key->ip.proto != IPPROTO_SCTP)
return -EINVAL;
- return validate_tp_port(flow_key, eth_type);
+ err = validate_tp_port(flow_key, eth_type);
+ if (err)
+ return err;
+ break;
default:
return -EINVAL;
}
+ /* Convert non-masked non-tunnel set actions to masked set actions. */
+ if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) {
+ int start, len = key_len * 2;
+ struct nlattr *at;
+
+ *skip_copy = true;
+
+ start = add_nested_action_start(sfa,
+ OVS_ACTION_ATTR_SET_TO_MASKED,
+ log);
+ if (start < 0)
+ return start;
+
+ at = __add_action(sfa, key_type, NULL, len, log);
+ if (IS_ERR(at))
+ return PTR_ERR(at);
+
+ memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */
+ memset(nla_data(at) + key_len, 0xff, key_len); /* Mask. */
+ /* Clear non-writeable bits from otherwise writeable fields. */
+ if (key_type == OVS_KEY_ATTR_IPV6) {
+ struct ovs_key_ipv6 *mask = nla_data(at) + key_len;
+
+ mask->ipv6_label &= htonl(0x000FFFFF);
+ }
+ add_nested_action_end(*sfa, start);
+ }
+
return 0;
}
@@ -1770,6 +1845,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
[OVS_ACTION_ATTR_POP_VLAN] = 0,
[OVS_ACTION_ATTR_SET] = (u32)-1,
+ [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash)
};
@@ -1872,12 +1948,20 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
break;
case OVS_ACTION_ATTR_SET:
- err = validate_set(a, key, sfa,
- &out_tnl_port, eth_type, log);
+ err = validate_set(a, key, sfa, &skip_copy, eth_type,
+ false, log);
if (err)
return err;
- skip_copy = out_tnl_port;
+ if (nla_type(nla_data(a)) == OVS_KEY_ATTR_TUNNEL)
+ out_tnl_port = true;
+ break;
+
+ case OVS_ACTION_ATTR_SET_MASKED:
+ err = validate_set(a, key, sfa, &skip_copy, eth_type,
+ true, log);
+ if (err)
+ return err;
break;
case OVS_ACTION_ATTR_SAMPLE:
@@ -1905,6 +1989,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
return 0;
}
+/* 'key' must be the masked key. */
int ovs_nla_copy_actions(const struct nlattr *attr,
const struct sw_flow_key *key,
struct sw_flow_actions **sfa, bool log)
@@ -1992,6 +2077,31 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
return 0;
}
+static int masked_set_action_to_attr(const struct nlattr *a,
+ struct sk_buff *skb)
+{
+ const struct nlattr *ovs_key = nla_data(a);
+
+ if (nla_put(skb, OVS_ACTION_ATTR_SET_MASKED, nla_len(a), ovs_key))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int masked_set_action_to_set_action_attr(const struct nlattr *a,
+ struct sk_buff *skb)
+{
+ const struct nlattr *ovs_key = nla_data(a);
+ size_t key_len = nla_len(ovs_key) / 2;
+
+ /* Revert the conversion we did from a non-masked set action to
+ * masked set action. */
+ if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a) - key_len, ovs_key))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
{
const struct nlattr *a;
@@ -2007,6 +2117,18 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
return err;
break;
+ case OVS_ACTION_ATTR_SET_MASKED:
+ err = masked_set_action_to_attr(a, skb);
+ if (err)
+ return err;
+ break;
+
+ case OVS_ACTION_ATTR_SET_TO_MASKED:
+ err = masked_set_action_to_set_action_attr(a, skb);
+ if (err)
+ return err;
+ break;
+
case OVS_ACTION_ATTR_SAMPLE:
err = sample_action_to_attr(a, skb);
if (err)
--
1.7.10.4
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev
^ permalink raw reply related
* Re: [PATCHv11 net-next 2/2] openvswitch: Add support for unique flow IDs.
From: Joe Stringer @ 2014-12-10 0:25 UTC (permalink / raw)
To: Pravin Shelar; +Cc: dev-yBygre7rU0TnMu66kgdUjQ@public.gmane.org, netdev, LKML
In-Reply-To: <CALnjE+o4tbQVTyVkzdOG8zj=qxPZ8Dv3vGHbYkBmghxsLgKkkw-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
On 9 December 2014 at 10:32, Pravin Shelar <pshelar@nicira.com> wrote:
> On Tue, Dec 2, 2014 at 6:56 PM, Joe Stringer <joestringer@nicira.com> wrote:
>> @@ -662,11 +664,18 @@ static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
>> }
>> }
>>
>> -static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
>> +static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
>> + const struct sw_flow_id *sfid)
>> {
>> + size_t sfid_len = 0;
>> +
>> + if (sfid && sfid->ufid_len)
>> + sfid_len = nla_total_size(sfid->ufid_len);
>> +
> Can you also use ufid_flags to determine exact msg size?
Yeah, that should be straightforward.
>> @@ -849,13 +876,30 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>> }
>>
>> /* Extract key. */
>> - ovs_match_init(&match, &new_flow->unmasked_key, &mask);
>> + ovs_match_init(&match, &key, &mask);
>> error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
>> a[OVS_FLOW_ATTR_MASK], log);
>> if (error)
>> goto err_kfree_flow;
>>
>> - ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask);
>> + ovs_flow_mask_key(&new_flow->key, &key, &mask);
>> +
>> + /* Extract flow id. */
>> + error = ovs_nla_copy_ufid(a[OVS_FLOW_ATTR_UFID], &new_flow->ufid, log);
>> + if (error)
>> + goto err_kfree_flow;
>
> Returning zero in case of no UFID is strange. Can you check for UFID
> attribute and then copy ufid unconditionally?
Right, along with the changes I'll make to integrating unmasked_key
and UFID this should collapse into a single call to
ovs_nla_copy_identifier() which will handle both cases.
>> @@ -877,8 +921,12 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
>> error = -ENODEV;
>> goto err_unlock_ovs;
>> }
>> +
>> /* Check if this is a duplicate flow */
>> - flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key);
>> + if (new_flow->ufid)
>> + flow = ovs_flow_tbl_lookup_ufid(&dp->table, new_flow->ufid);
>> + if (!flow)
>> + flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key);
>
> Need tight checking, for example what if flow with UFID does not exist
> in UFID table but it exist in flow-table? This can complicate
> flow-update case where we can not assume UFID of new and old flow is
> same.
OK, I overlooked this for the UFID case. The non-UFID case does an
exact lookup later in the function, we could add a check in the UFID
case to compare the masked keys and return an error if they are
unequal.
>> @@ -980,45 +1032,34 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
>> struct nlattr **a = info->attrs;
>> struct ovs_header *ovs_header = info->userhdr;
>> struct sw_flow_key key;
>> - struct sw_flow *flow;
>> + struct sw_flow *flow = NULL;
>> struct sw_flow_mask mask;
>> struct sk_buff *reply = NULL;
>> struct datapath *dp;
>> struct sw_flow_actions *old_acts = NULL, *acts = NULL;
>> struct sw_flow_match match;
>> + struct sw_flow_id *ufid;
>> + u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
>> int error;
>> bool log = !a[OVS_FLOW_ATTR_PROBE];
>>
>> - /* Extract key. */
>> - error = -EINVAL;
>> - if (!a[OVS_FLOW_ATTR_KEY]) {
>> + /* Extract identifier. Take a copy to avoid "Wframe-larger-than=1024"
>> + * warning.
>> + */
> What if we limit the implementation of UFID to max 128 bit, does it
> still gives you the warning?
It will if we still use struct sw_flow_id, when we combine
UFID+unmasked key. Possibly not with just a 128bit array. I'll look
into it.
>> + error = ovs_nla_copy_ufid(a[OVS_FLOW_ATTR_UFID], &ufid, log);
>> + if (error)
>> + return error;
>> + if (a[OVS_FLOW_ATTR_KEY]) {
>> + ovs_match_init(&match, &key, &mask);
>> + error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
>> + a[OVS_FLOW_ATTR_MASK], log);
>> + } else if (!ufid) {
>> OVS_NLERR(log, "Flow key attribute not present in set flow.");
>> - goto error;
>> + error = -EINVAL;
>> }
>> -
>> - ovs_match_init(&match, &key, &mask);
>> - error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
>> - a[OVS_FLOW_ATTR_MASK], log);
>> if (error)
>> goto error;
>>
>> - /* Validate actions. */
>> - if (a[OVS_FLOW_ATTR_ACTIONS]) {
>> - acts = get_flow_actions(a[OVS_FLOW_ATTR_ACTIONS], &key, &mask,
>> - log);
>> - if (IS_ERR(acts)) {
>> - error = PTR_ERR(acts);
>> - goto error;
>> - }
>> -
>> - /* Can allocate before locking if have acts. */
>> - reply = ovs_flow_cmd_alloc_info(acts, info, false);
>> - if (IS_ERR(reply)) {
>> - error = PTR_ERR(reply);
>> - goto err_kfree_acts;
>> - }
>> - }
>> -
> Why are you moving this action validation under ovs-lock?
The thought was that flow_cmd_set may receive UFID and not key/mask.
One could imagine a command that sends a UFID and actions, telling OVS
kmod to update just the actions. Masked key is required for
ovs_nla_copy_actions(), so in this case a lookup would be required to
get a masked key.
Perhaps the better alternative for the moment is to still require flow
key and mask for this command (just as we do for flow_cmd_new). That
would simplify this change greatly, and doesn't affect current OVS
userspace.
>> @@ -1194,9 +1254,18 @@ unlock:
>>
>> static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
>> {
>> + struct nlattr *a[__OVS_FLOW_ATTR_MAX];
>> struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
>> struct table_instance *ti;
>> struct datapath *dp;
>> + u32 ufid_flags;
>> + int err;
>> +
>> + err = nlmsg_parse(cb->nlh, GENL_HDRLEN + dp_flow_genl_family.hdrsize,
>> + a, dp_flow_genl_family.maxattr, flow_policy);
>
> Can you add genl helper function for this?
OK.
>> @@ -1235,6 +1304,8 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
>> [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
>> [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
>> [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
>> + [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC },
>> + [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
>> };
>>
>> static const struct genl_ops dp_flow_genl_ops[] = {
>> diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
>> index a8b30f3..7f31dbf 100644
>> --- a/net/openvswitch/flow.h
>> +++ b/net/openvswitch/flow.h
>> @@ -197,6 +197,13 @@ struct sw_flow_match {
>> struct sw_flow_mask *mask;
>> };
>>
>> +#define MAX_UFID_LENGTH 256
>> +
> For now we can limit this to 128 bits.
Is there a reason beyond trying to avoid the warning in flow_cmd_set()?
I suppose that its purpose as an identifier means that it's unlikely to
need to be much bigger in future (indeed, the larger it gets, the more
it would trade off the performance gains from using it).
>> @@ -213,13 +220,16 @@ struct flow_stats {
>>
>> struct sw_flow {
>> struct rcu_head rcu;
>> - struct hlist_node hash_node[2];
>> - u32 hash;
>> + struct {
>> + struct hlist_node node[2];
>> + u32 hash;
>> + } flow_hash, ufid_hash;
> I am not sure about _hash suffix here, Can you explain it? I think
> _table is better.
Agreed, table is better.
>> int stats_last_writer; /* NUMA-node id of the last writer on
>> * 'stats[0]'.
>> */
>> struct sw_flow_key key;
>> - struct sw_flow_key unmasked_key;
>> + struct sw_flow_id *ufid;
> Lets move this structure inside sw_flow, so that we can avoid one
> kmalloc during flow-install in case of UFID. something like:
>
> struct {
> u32 ufid_len;
> union {
> u32 ufid[MAX_UFID_LENGTH / 4];
> struct sw_flow_key *unmasked_key;
> }
> } id;
Agreed.
>> @@ -424,10 +475,9 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
>> ovs_flow_mask_key(&masked_key, unmasked, mask);
>> hash = flow_hash(&masked_key, key_start, key_end);
>> head = find_bucket(ti, hash);
>> - hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) {
>> - if (flow->mask == mask && flow->hash == hash &&
>> - flow_cmp_masked_key(flow, &masked_key,
>> - key_start, key_end))
>> + hlist_for_each_entry_rcu(flow, head, flow_hash.node[ti->node_ver]) {
>> + if (flow->mask == mask && flow->flow_hash.hash == hash &&
>> + flow_cmp_masked_key(flow, &masked_key, key_start, key_end))
>> return flow;
>> }
>> return NULL;
>> @@ -469,7 +519,40 @@ struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
>> /* Always called under ovs-mutex. */
>> list_for_each_entry(mask, &tbl->mask_list, list) {
>> flow = masked_flow_lookup(ti, match->key, mask);
>> - if (flow && ovs_flow_cmp_unmasked_key(flow, match)) /* Found */
>> + if (flow && !flow->ufid &&
> why not NULL check for flow->unmasked_key here rather than ufid?
In this version, I tried to consistently use flow->ufid as the switch
for whether UFID exists or not. In the next version, this statement
would refer to flow->id->ufid_len.
The current approach means that ovs_flow_tbl_lookup_exact() is really
ovs_flow_tbl_lookup_unmasked_key(). Do you think this should remain
specific to unmasked key or should it be made to check that the
identifier (ufid OR unmasked key) is the same?
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev
^ permalink raw reply
* Re: [PATCH iproute2 v2] ip link: Show devices by type
From: Stephen Hemminger @ 2014-12-10 0:31 UTC (permalink / raw)
To: Vadim Kochan; +Cc: netdev
In-Reply-To: <1417618598-23605-1-git-send-email-vadim4j@gmail.com>
On Wed, 3 Dec 2014 16:56:38 +0200
Vadim Kochan <vadim4j@gmail.com> wrote:
> Added new option 'type' to 'ip link show'
> command which allows to filter devices by type:
>
> ip link show type bridge
> ip link show type vlan
>
> Signed-off-by: Vadim Kochan <vadim4j@gmail.com>
Does not apply to current iproute2 source.
Please update and resubmit
^ permalink raw reply
* Re: [patch iproute2 2/6] bridge/fdb: fix statistics output spacing
From: Stephen Hemminger @ 2014-12-10 0:32 UTC (permalink / raw)
To: Jiri Pirko
Cc: netdev, davem, nhorman, andy, tgraf, dborkman, ogerlitz, jesse,
pshelar, azhou, ben, jeffrey.t.kirsher, vyasevic, xiyou.wangcong,
john.r.fastabend, edumazet, jhs, sfeldma, f.fainelli, roopa,
linville, jasowang, ebiederm, nicolas.dichtel, ryazanov.s.a,
buytenh, aviadr, nbd, alexei.starovoitov, Neil.Jerram, ronye,
simon.horman, alexander.h.duyck, john.ronciak, mleitner, shrijeet,
gospo, bcrl, hemal
In-Reply-To: <1417683438-10935-3-git-send-email-jiri@resnulli.us>
On Thu, 4 Dec 2014 09:57:14 +0100
Jiri Pirko <jiri@resnulli.us> wrote:
> From: Scott Feldman <sfeldma@gmail.com>
>
> Signed-off-by: Scott Feldman <sfeldma@gmail.com>
> Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Applied
^ permalink raw reply
* [net-next] dummy: use MODULE_VERSION
From: Flavio Leitner @ 2014-12-10 0:41 UTC (permalink / raw)
To: netdev; +Cc: Flavio Leitner
Use MODULE_VERSION() now that dummy driver has a version.
Signed-off-by: Flavio Leitner <fbl@redhat.com>
---
drivers/net/dummy.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index 413ca4f..49adbf1 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -225,3 +225,4 @@ module_init(dummy_init_module);
module_exit(dummy_cleanup_module);
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK(DRV_NAME);
+MODULE_VERSION(DRV_VERSION);
--
1.9.3
^ permalink raw reply related
* [REGRESSION] iproute: change to if_bridge.h breaks build
From: Stephen Hemminger @ 2014-12-10 0:43 UTC (permalink / raw)
To: David Miller, Gregory Fong; +Cc: netdev
This commit causes build problems in iproute.
I can fix it for iproute, by reordering the headers
but on the principle that updates should never break
builds of existing code, this is wrong.
gcc -Wall -Wstrict-prototypes -Wmissing-prototypes -Wmissing-declarations -Wold-style-definition -Wformat=2 -O2 -I../include -DRESOLVE_HOSTNAMES -DLIBDIR=\"/usr/lib\" -DCONFDIR=\"/etc/iproute2\" -D_GNU_SOURCE -DHAVE_SETNS -c -o iplink_bridge_slave.o iplink_bridge_slave.c
In file included from ../include/linux/if_bridge.h:18:0,
from iplink_bridge_slave.c:16:
../include/linux/in6.h:169:0: warning: "IPV6_ADD_MEMBERSHIP" redefined
#define IPV6_ADD_MEMBERSHIP 20
^
In file included from /usr/include/netinet/in.h:37:0,
from iplink_bridge_slave.c:14:
commit 66f1c44887ba4f47d617f8ae21cf8e04e1892bd7
Author: Gregory Fong <gregory.0xf0@gmail.com>
Date: Tue Nov 4 11:21:21 2014 -0800
bridge: include in6.h in if_bridge.h for struct in6_addr
if_bridge.h uses struct in6_addr ip6, but wasn't including the in6.h
header. Thomas Backlund originally sent a patch to do this, but this
revealed a redefinition issue: https://lkml.org/lkml/2013/1/13/116
The redefinition issue should have been fixed by the following Linux
commits:
ee262ad827f89e2dc7851ec2986953b5b125c6bc inet: defines IPPROTO_* needed for module alias generation
cfd280c91253cc28e4919e349fa7a813b63e71e8 net: sync some IP headers with glibc
and the following glibc commit:
6c82a2f8d7c8e21e39237225c819f182ae438db3 Coordinate IPv6 definitions for Linux and glibc
so actually include the header now.
Reported-by: Colin Guthrie <colin@mageia.org>
Reported-by: Christiaan Welvaart <cjw@daneel.dyndns.org>
Reported-by: Thomas Backlund <tmb@mageia.org>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: David Miller <davem@davemloft.net>
Signed-off-by: Gregory Fong <gregory.0xf0@gmail.com>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox