* [PATCH net-next 2/4] bonding: use RCU protection for alb xmit path
From: Ding Tianhong @ 2013-09-29 11:45 UTC (permalink / raw)
To: Jay Vosburgh, Andy Gospodarek, David S. Miller,
Nikolay Aleksandrov, Veaceslav Falico, Netdev
The commit 278b20837511776dc9d5f6ee1c7fabd5479838bb
(bonding: initial RCU conversion) has convert the roundrobin,
active-backup, broadcast and xor xmit path to rcu protection,
the performance will be better for these mode, so this time,
convert xmit path for alb mode.
Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Cc: Nikolay Aleksandrov <nikolay@redhat.com>
Cc: Veaceslav Falico <vfalico@redhat.com>
---
drivers/net/bonding/bond_alb.c | 23 +++++++++--------------
drivers/net/bonding/bonding.h | 2 +-
2 files changed, 10 insertions(+), 15 deletions(-)
diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index e960418..a1444d5 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -230,7 +230,7 @@ static struct slave *tlb_get_least_loaded_slave(struct bonding *bond)
max_gap = LLONG_MIN;
/* Find the slave with the largest gap */
- bond_for_each_slave(bond, slave, iter) {
+ bond_for_each_slave_rcu(bond, slave, iter) {
if (SLAVE_IS_OK(slave)) {
long long gap = compute_gap(slave);
@@ -387,7 +387,7 @@ static struct slave *rlb_next_rx_slave(struct bonding *bond)
struct list_head *iter;
bool found = false;
- bond_for_each_slave(bond, slave, iter) {
+ bond_for_each_slave_rcu(bond, slave, iter) {
if (!SLAVE_IS_OK(slave))
continue;
if (!found) {
@@ -628,12 +628,14 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
{
struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
struct arp_pkt *arp = arp_pkt(skb);
- struct slave *assigned_slave;
+ struct slave *assigned_slave, *curr_active_slave;
struct rlb_client_info *client_info;
u32 hash_index = 0;
_lock_rx_hashtbl(bond);
+ curr_active_slave = rcu_dereference(bond->curr_active_slave);
+
hash_index = _simple_hash((u8 *)&arp->ip_dst, sizeof(arp->ip_dst));
client_info = &(bond_info->rx_hashtbl[hash_index]);
@@ -658,8 +660,8 @@ static struct slave *rlb_choose_channel(struct sk_buff *skb, struct bonding *bon
* that the new client can be assigned to this entry.
*/
if (bond->curr_active_slave &&
- client_info->slave != bond->curr_active_slave) {
- client_info->slave = bond->curr_active_slave;
+ client_info->slave != curr_active_slave) {
+ client_info->slave = curr_active_slave;
rlb_update_client(client_info);
}
}
@@ -1343,11 +1345,6 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
skb_reset_mac_header(skb);
eth_data = eth_hdr(skb);
- /* make sure that the curr_active_slave do not change during tx
- */
- read_lock(&bond->lock);
- read_lock(&bond->curr_slave_lock);
-
switch (ntohs(skb->protocol)) {
case ETH_P_IP: {
const struct iphdr *iph = ip_hdr(skb);
@@ -1429,12 +1426,12 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
if (!tx_slave) {
/* unbalanced or unassigned, send through primary */
- tx_slave = bond->curr_active_slave;
+ tx_slave = rcu_dereference(bond->curr_active_slave);
bond_info->unbalanced_load += skb->len;
}
if (tx_slave && SLAVE_IS_OK(tx_slave)) {
- if (tx_slave != bond->curr_active_slave) {
+ if (tx_slave != rcu_dereference(bond->curr_active_slave)) {
memcpy(eth_data->h_source,
tx_slave->dev->dev_addr,
ETH_ALEN);
@@ -1449,8 +1446,6 @@ int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
}
}
- read_unlock(&bond->curr_slave_lock);
- read_unlock(&bond->lock);
if (res) {
/* no suitable interface, frame not sent */
kfree_skb(skb);
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 713b6af..1c7d559 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -459,7 +459,7 @@ static inline struct slave *bond_slave_has_mac(struct bonding *bond,
struct list_head *iter;
struct slave *tmp;
- bond_for_each_slave(bond, tmp, iter)
+ bond_for_each_slave_rcu(bond, tmp, iter)
if (ether_addr_equal_64bits(mac, tmp->dev->dev_addr))
return tmp;
--
1.8.2.1
^ permalink raw reply related
* [PATCH net-next 1/4] bonding: use RCU protection for 3ad xmit path
From: Ding Tianhong @ 2013-09-29 11:44 UTC (permalink / raw)
To: Jay Vosburgh, Andy Gospodarek, David S. Miller,
Nikolay Aleksandrov, Veaceslav Falico, Netdev
The commit 278b20837511776dc9d5f6ee1c7fabd5479838bb
(bonding: initial RCU conversion) has convert the roundrobin,
active-backup, broadcast and xor xmit path to rcu protection,
the performance will be better for these mode, so this time,
convert xmit path for 3ad mode.
Suggested-by: Nikolay Aleksandrov <nikolay@redhat.com>
Suggested-by: Veaceslav Falico <vfalico@redhat.com>
Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Cc: Nikolay Aleksandrov <nikolay@redhat.com>
Cc: Veaceslav Falico <vfalico@redhat.com>
---
drivers/net/bonding/bond_3ad.c | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index c62606a..971a7dc 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -2344,7 +2344,7 @@ int __bond_3ad_get_active_agg_info(struct bonding *bond,
struct slave *slave;
struct port *port;
- bond_for_each_slave(bond, slave, iter) {
+ bond_for_each_slave_rcu(bond, slave, iter) {
port = &(SLAVE_AD_INFO(slave).port);
if (port->aggregator && port->aggregator->is_active) {
aggregator = port->aggregator;
@@ -2388,7 +2388,6 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
int res = 1;
int agg_id;
- read_lock(&bond->lock);
if (__bond_3ad_get_active_agg_info(bond, &ad_info)) {
pr_debug("%s: Error: __bond_3ad_get_active_agg_info failed\n",
dev->name);
@@ -2406,7 +2405,7 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
slave_agg_no = bond->xmit_hash_policy(skb, slaves_in_agg);
first_ok_slave = NULL;
- bond_for_each_slave(bond, slave, iter) {
+ bond_for_each_slave_rcu(bond, slave, iter) {
agg = SLAVE_AD_INFO(slave).port.aggregator;
if (!agg || agg->aggregator_identifier != agg_id)
continue;
@@ -2436,7 +2435,6 @@ int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev)
res = bond_dev_queue_xmit(bond, skb, first_ok_slave->dev);
out:
- read_unlock(&bond->lock);
if (res) {
/* no suitable interface, frame not sent */
kfree_skb(skb);
--
1.8.2.1
^ permalink raw reply related
* [PATCH]: iproute action: typo nat fix
From: Jamal Hadi Salim @ 2013-09-29 11:39 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: netdev@vger.kernel.org, herbert
[-- Attachment #1: Type: text/plain, Size: 26 bytes --]
attached.
cheers,
jamal
[-- Attachment #2: nat-1 --]
[-- Type: text/plain, Size: 779 bytes --]
commit 012b8c4b3af629039d2bb47ade6e0d3e6980a72b
Author: Jamal Hadi Salim <hadi@mojatatu.com>
Date: Sun Sep 29 07:35:21 2013 -0400
If you taketh you giveth.
I Went the LinuxWay and copied this for m_simple.c and noticed
this one typo (I wonder where it came from?;->).
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
diff --git a/tc/m_nat.c b/tc/m_nat.c
index 01ec032..d502a81 100644
--- a/tc/m_nat.c
+++ b/tc/m_nat.c
@@ -146,7 +146,7 @@ parse_nat(struct action_util *a, int *argc_p, char ***argv_p, int tca_id, struct
if (matches(*argv, "index") == 0) {
NEXT_ARG();
if (get_u32(&sel.index, *argv, 10)) {
- fprintf(stderr, "Pedit: Illegal \"index\"\n");
+ fprintf(stderr, "Nat: Illegal \"index\"\n");
return -1;
}
argc--;
^ permalink raw reply related
* [PATCH]: iproute action: Introduce simple action
From: Jamal Hadi Salim @ 2013-09-29 11:33 UTC (permalink / raw)
To: Stephen Hemminger; +Cc: netdev@vger.kernel.org
[-- Attachment #1: Type: text/plain, Size: 286 bytes --]
I had to explain this to someone after they said it wasnt in
iproute2.
Note: For some reason not all kernel headers are copied. You need
at least have linux/tc_act/tc_defact.h for this to compile.
I could send you a patch - but i think this is something you
should do.
cheers,
jamal
[-- Attachment #2: simp-p1 --]
[-- Type: text/plain, Size: 6290 bytes --]
commit ce8f6c550ff714203ca565699e9f332a93704e41
Author: Jamal Hadi Salim <hadi@mojatatu.com>
Date: Sun Sep 29 07:19:23 2013 -0400
Simple action is already in the kernel for years now as an
example. This complements it with user space control.
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
diff --git a/tc/Makefile b/tc/Makefile
index 1eeabd8..f54a955 100644
--- a/tc/Makefile
+++ b/tc/Makefile
@@ -38,6 +38,7 @@ TCMODULES += m_nat.o
TCMODULES += m_pedit.o
TCMODULES += m_skbedit.o
TCMODULES += m_csum.o
+TCMODULES += m_simple.o
TCMODULES += p_ip.o
TCMODULES += p_icmp.o
TCMODULES += p_tcp.o
diff --git a/tc/m_simple.c b/tc/m_simple.c
new file mode 100644
index 0000000..0224440
--- /dev/null
+++ b/tc/m_simple.c
@@ -0,0 +1,202 @@
+/*
+ * m_simple.c simple action
+ *
+ * This program is free software; you can distribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: J Hadi Salim <jhs@mojatatu.com>
+ *
+ * Pedagogical example. Adds a string that will be printed everytime
+ * the simple instance is hit.
+ * Use this as a skeleton action and keep modifying it to meet your needs.
+ * Look at linux/tc_act/tc_defact.h for the different components ids and
+ * definitions used in this actions
+ *
+ * example use, yell "Incoming ICMP!" every time you see an incoming ICMP on
+ * eth0. Steps are:
+ * 1) Add an ingress qdisc point to eth0
+ * 2) Start a chain on ingress of eth0 that first matches ICMP then invokes
+ * the simple action to shout.
+ * 3) display stats and show that no packet has been seen by the action
+ * 4) Send one ping packet to google (expect to receive a response back)
+ * 5) grep the logs to see the logged message
+ * 6) display stats again and observe increment by 1
+ *
+ hadi@noma1:$ tc qdisc add dev eth0 ingress
+ hadi@noma1:$tc filter add dev eth0 parent ffff: protocol ip prio 5 \
+ u32 match ip protocol 1 0xff flowid 1:1 action simple "Incoming ICMP"
+
+ hadi@noma1:$ sudo tc -s filter ls dev eth0 parent ffff:
+ filter protocol ip pref 5 u32
+ filter protocol ip pref 5 u32 fh 800: ht divisor 1
+ filter protocol ip pref 5 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1
+ match 00010000/00ff0000 at 8
+ action order 1: Simple <Incoming ICMP>
+ index 4 ref 1 bind 1 installed 29 sec used 29 sec
+ Action statistics:
+ Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
+ backlog 0b 0p requeues 0
+
+
+ hadi@noma1$ ping -c 1 www.google.ca
+ PING www.google.ca (74.125.225.120) 56(84) bytes of data.
+ 64 bytes from ord08s08-in-f24.1e100.net (74.125.225.120): icmp_req=1 ttl=53 time=31.3 ms
+
+ --- www.google.ca ping statistics ---
+ 1 packets transmitted, 1 received, 0% packet loss, time 0ms
+ rtt min/avg/max/mdev = 31.316/31.316/31.316/0.000 ms
+
+ hadi@noma1$ dmesg | grep simple
+ [135354.473951] simple: Incoming ICMP_1
+
+ hadi@noma1$ sudo tc/tc -s filter ls dev eth0 parent ffff:
+ filter protocol ip pref 5 u32
+ filter protocol ip pref 5 u32 fh 800: ht divisor 1
+ filter protocol ip pref 5 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:1
+ match 00010000/00ff0000 at 8
+ action order 1: Simple <Incoming ICMP>
+ index 4 ref 1 bind 1 installed 206 sec used 67 sec
+ Action statistics:
+ Sent 84 bytes 1 pkt (dropped 0, overlimits 0 requeues 0)
+ backlog 0b 0p requeues 0
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+#include "utils.h"
+#include "tc_util.h"
+#include <linux/tc_act/tc_defact.h>
+
+#ifndef SIMP_MAX_DATA
+#define SIMP_MAX_DATA 32
+#endif
+static void explain(void)
+{
+ fprintf(stderr, "Usage: ... simple STRING\n"
+ "STRING being an arbitrary string\n"
+ "example: \"simple blah\"\n");
+}
+
+static void usage(void)
+{
+ explain();
+ exit(-1);
+}
+
+static int
+parse_simple(struct action_util *a, int *argc_p, char ***argv_p, int tca_id,
+ struct nlmsghdr *n)
+{
+ struct tc_defact sel = {};
+ int argc = *argc_p;
+ char **argv = *argv_p;
+ int ok = 0;
+ struct rtattr *tail;
+ char *simpdata = NULL;
+
+
+ while (argc > 0) {
+ if (matches(*argv, "simple") == 0) {
+ NEXT_ARG();
+ simpdata = *argv;
+ ok = 1;
+ argc--;
+ argv++;
+ break;
+ } else if (matches(*argv, "help") == 0) {
+ usage();
+ } else {
+ break;
+ }
+
+ }
+
+ if (!ok) {
+ explain();
+ return -1;
+ }
+
+ if (argc) {
+ if (matches(*argv, "index") == 0) {
+ NEXT_ARG();
+ if (get_u32(&sel.index, *argv, 10)) {
+ fprintf(stderr, "simple: Illegal \"index\"\n");
+ return -1;
+ }
+ argc--;
+ argv++;
+ }
+ }
+
+ if (strlen(simpdata) > (SIMP_MAX_DATA - 1)) {
+ fprintf(stderr, "simple: Illegal string len %ld <%s> \n",
+ strlen(simpdata), simpdata);
+ return -1;
+ }
+
+ sel.action = TC_ACT_PIPE;
+
+ tail = NLMSG_TAIL(n);
+ addattr_l(n, MAX_MSG, tca_id, NULL, 0);
+ addattr_l(n, MAX_MSG, TCA_DEF_PARMS, &sel, sizeof(sel));
+ addattr_l(n, MAX_MSG, TCA_DEF_DATA, simpdata, SIMP_MAX_DATA);
+ tail->rta_len = (char *)NLMSG_TAIL(n) - (char *)tail;
+
+ *argc_p = argc;
+ *argv_p = argv;
+ return 0;
+}
+
+static int print_simple(struct action_util *au, FILE * f, struct rtattr *arg)
+{
+ struct tc_defact *sel;
+ struct rtattr *tb[TCA_DEF_MAX + 1];
+ char *simpdata;
+
+ if (arg == NULL)
+ return -1;
+
+ parse_rtattr_nested(tb, TCA_DEF_MAX, arg);
+
+ if (tb[TCA_DEF_PARMS] == NULL) {
+ fprintf(f, "[NULL simple parameters]");
+ return -1;
+ }
+ sel = RTA_DATA(tb[TCA_DEF_PARMS]);
+
+ if (tb[TCA_DEF_DATA] == NULL) {
+ fprintf(f, "[missing simple string]");
+ return -1;
+ }
+
+ simpdata = RTA_DATA(tb[TCA_DEF_DATA]);
+
+ fprintf(f, "Simple <%s>\n", simpdata);
+ fprintf(f, "\t index %d ref %d bind %d", sel->index,
+ sel->refcnt, sel->bindcnt);
+
+ if (show_stats) {
+ if (tb[TCA_DEF_TM]) {
+ struct tcf_t *tm = RTA_DATA(tb[TCA_DEF_TM]);
+ print_tm(f, tm);
+ fprintf(f, "\n");
+ }
+ }
+
+ return 0;
+}
+
+struct action_util simple_action_util = {
+ .id = "simple",
+ .parse_aopt = parse_simple,
+ .print_aopt = print_simple,
+};
^ permalink raw reply related
* Re: [PATCH net-next] qdisc: basic classifier - remove unnecessary initialization
From: Jamal Hadi Salim @ 2013-09-29 11:28 UTC (permalink / raw)
To: Stephen Hemminger, David Miller; +Cc: netdev
In-Reply-To: <20130926174216.447555bd@nehalam.linuxnetplumber.net>
On 13-09-26 08:42 PM, Stephen Hemminger wrote:
> err is set once, then first code resets it.
> err = tcf_exts_validate(...)
>
> Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
>
>
> --- a/net/sched/cls_basic.c 2013-08-10 10:36:11.657498301 -0700
> +++ b/net/sched/cls_basic.c 2013-09-05 18:05:14.718200833 -0700
> @@ -137,7 +137,7 @@ static int basic_set_parms(struct net *n
> struct nlattr **tb,
> struct nlattr *est)
> {
> - int err = -EINVAL;
> + int err;
> struct tcf_exts e;
> struct tcf_ematch_tree t;
>
Acked-by: Jamal Hadi Salim <hadi@mojatatu.com>
cheers,
jamal
^ permalink raw reply
* Re: [PATCH] IPv6: Allow the MTU of ipip6 tunnel to be set below 1280
From: Oussama Ghorbel @ 2013-09-29 9:40 UTC (permalink / raw)
To: Oussama Ghorbel, David S. Miller, Alexey Kuznetsov, James Morris,
Hideaki YOSHIFUJI, Patrick McHardy, netdev, linux-kernel
Cc: ou.ghorbel
In-Reply-To: <20130927170345.GC10343@order.stressinduktion.org>
On Fri, Sep 27, 2013 at 6:03 PM, Hannes Frederic Sowa
<hannes@stressinduktion.org> wrote:
> On Fri, Sep 27, 2013 at 05:36:45PM +0100, Oussama Ghorbel wrote:
>> Please see my comments below
>>
>> Regards,
>> Oussama
>>
>> On Fri, Sep 27, 2013 at 11:58 AM, Hannes Frederic Sowa
>> <hannes@stressinduktion.org> wrote:
>> > On Fri, Sep 27, 2013 at 11:45:48AM +0100, Oussama Ghorbel wrote:
>> >> The ip6_tunnel.c module would be then dependent on ip_tunnel.c and may
>> >> be it would not be good thing?
>> >
>> > It could just be a static inline in some shared header. So there would
>> > be no compile-time dependency.
>> >
>>
>> The higher limit of mtu in ip_tunnel_change_mtu() is a calculated value.
>> This high limit is calculated using the netdev priv struct ip_tunnel
>> (field hlen).
>> This netdev priv struct is different in ipv6, it's a ip6_tnl struct.
>> Therefore implementing a beautiful function like
>> ip_tunnel_valid_mtu(struct net_device *dev, int mtu) won't be
>> feasible, unless we implement it in macro or something like like
>> ip_tunnel_valid_mtu(struct net_device *dev, int hlen, int mtu) which
>> seems not very nice ...
>> What do yo think?
>
> Ok, let's go with one function per protocol type. Seems easier.
>
> It seems to get more hairy, because it depends on the tunnel driver if the
> prepended ip header is accounted in hard_header_len. :/
>
> I don't know if it works out cleanly. Otherwise I would be ok if the checks
> just get repeated in ip6_tunnel and leave the rest as-is.
>
Yes, It will be the clean way to do it.
>>
>> >> As I have check in v3.10 there is no call from ip6_tunnel to ip_tunnel...
>> >>
>> >> For information, there is no check for the maximum MTU for ipv4 in the
>> >> patch as this is not done for ipv6.
>> >
>> > I understand, but it would be better to limit the MTU here. There is a
>> > (non-jumo) IPV6_MAXPLEN constant.
>> >
>> > Looking through the source it seems grev6 does actually check this,
>> > so it would not hurt adding them here, too.
>>
>> what if jumbograms is used, in that case, we can't use IPV6_MAXPLEN.
>> the limit would be the the full unsigned int.
>> However checking the higher limit for ipv4 would be useful.
>
> Linux currently cannot create "jumbograms" (only the receiving side
> is supported).
>
I understand, but what are the benefit from this limit or the harm
from not specifying it?
Please check this comment from eth.c
/**
* eth_change_mtu - set new MTU size
* @dev: network device
* @new_mtu: new Maximum Transfer Unit
*
* Allow changing MTU size. Needs to be overridden for devices
* supporting jumbo frames.
*/
int eth_change_mtu(struct net_device *dev, int new_mtu)
So wouldn't be a good idea to let our function open for jumbo frames...?
>> Please also note in case the tunnel mode is any (ipv4 or ipv6 means
>> ip6_tnl.parms.proto = 0), then we will be required to take the most
>> restrict limit from both ipv4 and ipv6 which means the lower limit
>> will be 1280 and the higher limit will be about 65535
>> Do you agree on this?
>
> Agreed, this would be needed for e.g. the sit driver, which now can
> handle both protocols.
>
> Thanks,
>
> Hannes
>
Thanks,
Oussama
^ permalink raw reply
* Re: [PATCH V3 4/6] vhost_net: determine whether or not to use zerocopy at one time
From: Jason Wang @ 2013-09-29 9:36 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, kvm, virtualization
In-Reply-To: <5243B859.3070302@redhat.com>
On 09/26/2013 12:30 PM, Jason Wang wrote:
> On 09/23/2013 03:16 PM, Michael S. Tsirkin wrote:
>> > On Thu, Sep 05, 2013 at 10:54:44AM +0800, Jason Wang wrote:
>>>> >> > On 09/04/2013 07:59 PM, Michael S. Tsirkin wrote:
>>>>>> >>> > > On Mon, Sep 02, 2013 at 04:40:59PM +0800, Jason Wang wrote:
>>>>>>>> >>>> > >> Currently, even if the packet length is smaller than VHOST_GOODCOPY_LEN, if
>>>>>>>> >>>> > >> upend_idx != done_idx we still set zcopy_used to true and rollback this choice
>>>>>>>> >>>> > >> later. This could be avoided by determining zerocopy once by checking all
>>>>>>>> >>>> > >> conditions at one time before.
>>>>>>>> >>>> > >>
>>>>>>>> >>>> > >> Signed-off-by: Jason Wang <jasowang@redhat.com>
>>>>>>>> >>>> > >> ---
>>>>>>>> >>>> > >> drivers/vhost/net.c | 47 ++++++++++++++++++++---------------------------
>>>>>>>> >>>> > >> 1 files changed, 20 insertions(+), 27 deletions(-)
>>>>>>>> >>>> > >>
>>>>>>>> >>>> > >> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
>>>>>>>> >>>> > >> index 8a6dd0d..3f89dea 100644
>>>>>>>> >>>> > >> --- a/drivers/vhost/net.c
>>>>>>>> >>>> > >> +++ b/drivers/vhost/net.c
>>>>>>>> >>>> > >> @@ -404,43 +404,36 @@ static void handle_tx(struct vhost_net *net)
>>>>>>>> >>>> > >> iov_length(nvq->hdr, s), hdr_size);
>>>>>>>> >>>> > >> break;
>>>>>>>> >>>> > >> }
>>>>>>>> >>>> > >> - zcopy_used = zcopy && (len >= VHOST_GOODCOPY_LEN ||
>>>>>>>> >>>> > >> - nvq->upend_idx != nvq->done_idx);
>>>>>>>> >>>> > >> +
>>>>>>>> >>>> > >> + zcopy_used = zcopy && len >= VHOST_GOODCOPY_LEN
>>>>>>>> >>>> > >> + && (nvq->upend_idx + 1) % UIO_MAXIOV !=
>>>>>>>> >>>> > >> + nvq->done_idx
>>>>>> >>> > > Thinking about this, this looks strange.
>>>>>> >>> > > The original idea was that once we start doing zcopy, we keep
>>>>>> >>> > > using the heads ring even for short packets until no zcopy is outstanding.
>>>> >> >
>>>> >> > What's the reason for keep using the heads ring?
>> > To keep completions in order.
> Ok, I will do some test to see the impact.
Since the our of order completion will happen when switching between
zero copy and non zero copy. I test this by using two sessions of
netperf in burst mode, one with 1 byte TCP_RR another with 512 bytes of
TCP_RR. There's no difference with the patch applied.
^ permalink raw reply
* RE: [PATCH net-next] net: skb_is_gso_v6() requires skb_is_gso()
From: Dmitry Kravkov @ 2013-09-29 9:03 UTC (permalink / raw)
To: Eric Dumazet, David Miller; +Cc: netdev, Eilon Greenstein
In-Reply-To: <1380442892.3596.22.camel@edumazet-glaptop.roam.corp.google.com>
> -----Original Message-----
> From: netdev-owner@vger.kernel.org [mailto:netdev-
> owner@vger.kernel.org] On Behalf Of Eric Dumazet
> Sent: Sunday, September 29, 2013 11:22 AM
> To: David Miller
> Cc: netdev; Eilon Greenstein
> Subject: [PATCH net-next] net: skb_is_gso_v6() requires skb_is_gso()
>
> From: Eric Dumazet <edumazet@google.com>
>
> bnx2x makes a dangerous use of skb_is_gso_v6().
>
> It should first make sure skb is a gso packet
>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Cc: Eilon Greenstein <eilong@broadcom.com>
Thanks, Eric!
Acked-by: Dmitry Kravkov <dmitry@broadcom.com>
^ permalink raw reply
* RE: ADMINISTRATOR HELP DESK
From: Greene, Tanya @ 2013-09-29 8:26 UTC (permalink / raw)
To: Greene, Tanya
In-Reply-To: <0FF456CBAA9B324987CEA675439D7EF801151DF4AE@EXCH10-MB3.paterson.k12.nj.us>
Your Mailbox Has Exceeded It Storage Limit As Set By Your Administrator from the server, And You Will Not Be Able To Receive New Mails until You Re-Validate It click Re-validate<http://postmaster-supportit.coffeecup.com/forms/HELP%20DESK/> .Thank you courtesy © 2013 by Intellectual Reserve, Inc. All rights reserved
^ permalink raw reply
* [PATCH net-next] net: skb_is_gso_v6() requires skb_is_gso()
From: Eric Dumazet @ 2013-09-29 8:21 UTC (permalink / raw)
To: David Miller; +Cc: netdev, Eilon Greenstein
From: Eric Dumazet <edumazet@google.com>
bnx2x makes a dangerous use of skb_is_gso_v6().
It should first make sure skb is a gso packet
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Eilon Greenstein <eilong@broadcom.com>
---
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c | 18 +++++++-------
include/linux/skbuff.h | 1
2 files changed, 11 insertions(+), 8 deletions(-)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 61726af..0c7fb1e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -3256,14 +3256,16 @@ static u32 bnx2x_xmit_type(struct bnx2x *bp, struct sk_buff *skb)
if (prot == IPPROTO_TCP)
rc |= XMIT_CSUM_TCP;
- if (skb_is_gso_v6(skb)) {
- rc |= (XMIT_GSO_V6 | XMIT_CSUM_TCP);
- if (rc & XMIT_CSUM_ENC)
- rc |= XMIT_GSO_ENC_V6;
- } else if (skb_is_gso(skb)) {
- rc |= (XMIT_GSO_V4 | XMIT_CSUM_TCP);
- if (rc & XMIT_CSUM_ENC)
- rc |= XMIT_GSO_ENC_V4;
+ if (skb_is_gso(skb)) {
+ if (skb_is_gso_v6(skb)) {
+ rc |= (XMIT_GSO_V6 | XMIT_CSUM_TCP);
+ if (rc & XMIT_CSUM_ENC)
+ rc |= XMIT_GSO_ENC_V6;
+ } else {
+ rc |= (XMIT_GSO_V4 | XMIT_CSUM_TCP);
+ if (rc & XMIT_CSUM_ENC)
+ rc |= XMIT_GSO_ENC_V4;
+ }
}
return rc;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2ddb48d..d48fde30 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2783,6 +2783,7 @@ static inline bool skb_is_gso(const struct sk_buff *skb)
return skb_shinfo(skb)->gso_size;
}
+/* Note: Should be called only if skb_is_gso(skb) is true */
static inline bool skb_is_gso_v6(const struct sk_buff *skb)
{
return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6;
^ permalink raw reply related
* [PATCH net-next] net: add missing sk_max_pacing_rate doc
From: Eric Dumazet @ 2013-09-29 8:12 UTC (permalink / raw)
To: David Miller; +Cc: netdev
In-Reply-To: <20130928.153610.2280100769720851440.davem@davemloft.net>
From: Eric Dumazet <edumazet@google.com>
Warning(include/net/sock.h:411): No description found for parameter
'sk_max_pacing_rate'
Lets please "make htmldocs" and kbuild bot.
Reported-by: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
---
include/net/sock.h | 1 +
1 file changed, 1 insertion(+)
diff --git a/include/net/sock.h b/include/net/sock.h
index 240aa3f..cf91c8e 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -233,6 +233,7 @@ struct cg_proto;
* @sk_ll_usec: usecs to busypoll when there is no data
* @sk_allocation: allocation mode
* @sk_pacing_rate: Pacing rate (if supported by transport/packet scheduler)
+ * @sk_max_pacing_rate: Maximum pacing rate (%SO_MAX_PACING_RATE)
* @sk_sndbuf: size of send buffer in bytes
* @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
* %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
^ permalink raw reply related
* Re: [PATCH] ipv6: gre: correct calculation of max_headroom
From: Eric Dumazet @ 2013-09-29 8:05 UTC (permalink / raw)
To: Hannes Frederic Sowa; +Cc: netdev, xeb
In-Reply-To: <20130929034050.GB8602@order.stressinduktion.org>
On Sun, 2013-09-29 at 05:40 +0200, Hannes Frederic Sowa wrote:
> gre_hlen already accounts for sizeof(struct ipv6_hdr) + gre header,
> so initialize max_headroom to zero. Otherwise the
>
> if (encap_limit >= 0) {
> max_headroom += 8;
> mtu -= 8;
> }
>
> increments an uninitialized variable before max_headroom was reset.
>
> Found with coverity: 728539
>
> Cc: Dmitry Kozlov <xeb@mail.ru>
> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
> ---
Acked-by: Eric Dumazet <edumazet@google.com>
^ permalink raw reply
* Re: [PATCH] ipv6: sit: ensure prefixlen <= 128 before calling ipv6_addr_prefix
From: Hannes Frederic Sowa @ 2013-09-29 3:53 UTC (permalink / raw)
To: netdev
In-Reply-To: <20130929031535.GA8602@order.stressinduktion.org>
On Sun, Sep 29, 2013 at 05:15:35AM +0200, Hannes Frederic Sowa wrote:
> Found with coverity: CID 139492
>
> Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Disregard, I was fooled by the report. This is not necessary.
^ permalink raw reply
* [PATCH] ipv6: gre: correct calculation of max_headroom
From: Hannes Frederic Sowa @ 2013-09-29 3:40 UTC (permalink / raw)
To: netdev; +Cc: xeb
gre_hlen already accounts for sizeof(struct ipv6_hdr) + gre header,
so initialize max_headroom to zero. Otherwise the
if (encap_limit >= 0) {
max_headroom += 8;
mtu -= 8;
}
increments an uninitialized variable before max_headroom was reset.
Found with coverity: 728539
Cc: Dmitry Kozlov <xeb@mail.ru>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
net/ipv6/ip6_gre.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 6b26e9f..7bb5446 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -618,7 +618,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
struct ip6_tnl *tunnel = netdev_priv(dev);
struct net_device *tdev; /* Device to other host */
struct ipv6hdr *ipv6h; /* Our new IP header */
- unsigned int max_headroom; /* The extra header space needed */
+ unsigned int max_headroom = 0; /* The extra header space needed */
int gre_hlen;
struct ipv6_tel_txoption opt;
int mtu;
@@ -693,7 +693,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev)));
- max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
+ max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
(skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
--
1.8.3.1
^ permalink raw reply related
* Re: [PATCH] {iproute2, xfrm}: Use memcpy to suppress gcc phony buffer overflow warning
From: Sohny Thomas @ 2013-09-29 3:21 UTC (permalink / raw)
To: Fan Du; +Cc: David Laight, stephen, netdev
In-Reply-To: <52478710.702@windriver.com>
On Sunday 29 September 2013 07:19 AM, Fan Du wrote:
>
>
> On 2013年09月28日 21:24, Sohny Thomas wrote:
>> On Friday 27 September 2013 01:56 PM, David Laight wrote:
>>>> ip xfrm state add causes a SIGABRT due to a strncpy_chk .
>>>> This happens since strncpy doesn't account for the '\0' .
>>>> I have fixed this using sizeof instead of strlen .
>>>>
>>>> There is a redhat bug which documents this issue
>>>>
>>>> https://bugzilla.redhat.com/show_bug.cgi?id=982761
>>>>
>>>> Signed-off-by: Sohny Thomas <sohthoma@in.ibm.com>
>>>>
>>>> --------------
>>>>
>>>> diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c
>>>> index 389942c..7dd8799 100644
>>>> --- a/ip/xfrm_state.c
>>>> +++ b/ip/xfrm_state.c
>>>> @@ -117,7 +117,7 @@ static int xfrm_algo_parse(struct xfrm_algo *alg,
>>>> enum xfrm_attr_type_t type,
>>>> char *name, char *key, char *buf, int max)
>>>> {
>>>> int len;
>>>> - int slen = strlen(key);
>>>> + int slen = sizeof(key);
>>>
>>> you definitely don't want sizeof(key) - that is either 4 or 8.
>> oh damn my bad.
>> I think i will go with strlen(key) + 1.
>>
>> or i will pass slen+1 to strncpy .
>
> You must be kidding about this slen+1 or strlen(key) + 1 , right?
strncpy fails with an abort at strncpy_chk.
So I was assuming it was because of the '\0' , so the +1
Thanks for the below patch
>
>
>
> From 3895b3d88bcb873988089392079645f2c9665e95 Mon Sep 17 00:00:00 2001
> From: Fan Du <fan.du@windriver.com>
> Date: Sun, 29 Sep 2013 09:38:12 +0800
> Subject: [PATCH] {iproute2, xfrm}: Use memcpy to suppress gcc phony buffer
> overflow warning.
>
> This bug is reported from below link:
> https://bugzilla.redhat.com/show_bug.cgi?id=982761
>
> An simplified command from its original reproducing method in bugzilla:
> ip xfrm state add src 10.0.0.2 dst 10.0.0.1 proto ah spi 0x12345678 auth
> md5 12
> will cause below spew from gcc:
>
> *** buffer overflow detected ***: ./ip terminated
> ======= Backtrace: =========
> /lib/x86_64-linux-gnu/libc.so.6(__fortify_fail+0x37)[0x7f894ae2b817]
> /lib/x86_64-linux-gnu/libc.so.6(+0x109710)[0x7f894ae2a710]
> /lib/x86_64-linux-gnu/libc.so.6(+0x1089f6)[0x7f894ae299f6]
> ./ip[0x42119d]
> ./ip(do_xfrm_state+0x231)[0x4217f1]
> ./ip[0x405d05]
> ./ip(main+0x2b4)[0x405934]
> /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed)[0x7f894ad4276d]
> ./ip[0x405bb9]
> ======= Memory map: ========
> 00400000-0043c000 r-xp 00000000 08:01 1997183
> /workbench/iproute2/ip/ip
> 0063b000-0063c000 r--p 0003b000 08:01 1997183
> /workbench/iproute2/ip/ip
> 0063c000-00640000 rw-p 0003c000 08:01 1997183
> /workbench/iproute2/ip/ip
> 00640000-00643000 rw-p 00000000 00:00 0
> 017c7000-017e8000 rw-p 00000000 00:00 0
> [heap]
> 7f894ab0b000-7f894ab20000 r-xp 00000000 08:01 6032986
> /lib/x86_64-linux-gnu/libgcc_s.so.1
> 7f894ab20000-7f894ad1f000 ---p 00015000 08:01 6032986
> /lib/x86_64-linux-gnu/libgcc_s.so.1
> 7f894ad1f000-7f894ad20000 r--p 00014000 08:01 6032986
> /lib/x86_64-linux-gnu/libgcc_s.so.1
> 7f894ad20000-7f894ad21000 rw-p 00015000 08:01 6032986
> /lib/x86_64-linux-gnu/libgcc_s.so.1
> 7f894ad21000-7f894aed6000 r-xp 00000000 08:01 6033273
> /lib/x86_64-linux-gnu/libc-2.15.so
> 7f894aed6000-7f894b0d5000 ---p 001b5000 08:01 6033273
> /lib/x86_64-linux-gnu/libc-2.15.so
> 7f894b0d5000-7f894b0d9000 r--p 001b4000 08:01 6033273
> /lib/x86_64-linux-gnu/libc-2.15.so
> 7f894b0d9000-7f894b0db000 rw-p 001b8000 08:01 6033273
> /lib/x86_64-linux-gnu/libc-2.15.so
> 7f894b0db000-7f894b0e0000 rw-p 00000000 00:00 0
> 7f894b0e0000-7f894b0e2000 r-xp 00000000 08:01 6033272
> /lib/x86_64-linux-gnu/libdl-2.15.so
> 7f894b0e2000-7f894b2e2000 ---p 00002000 08:01 6033272
> /lib/x86_64-linux-gnu/libdl-2.15.so
> 7f894b2e2000-7f894b2e3000 r--p 00002000 08:01 6033272
> /lib/x86_64-linux-gnu/libdl-2.15.so
> 7f894b2e3000-7f894b2e4000 rw-p 00003000 08:01 6033272
> /lib/x86_64-linux-gnu/libdl-2.15.so
> 7f894b2e4000-7f894b306000 r-xp 00000000 08:01 6033287
> /lib/x86_64-linux-gnu/ld-2.15.so
> 7f894b4e3000-7f894b4e6000 rw-p 00000000 00:00 0
> 7f894b503000-7f894b506000 rw-p 00000000 00:00 0
> 7f894b506000-7f894b507000 r--p 00022000 08:01 6033287
> /lib/x86_64-linux-gnu/ld-2.15.so
> 7f894b507000-7f894b509000 rw-p 00023000 08:01 6033287
> /lib/x86_64-linux-gnu/ld-2.15.so
> 7fff9a3c2000-7fff9a3e3000 rw-p 00000000 00:00 0
> [stack]
> 7fff9a3ff000-7fff9a400000 r-xp 00000000 00:00 0
> [vdso]
> ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0
> [vsyscall]
>
> This is a false positive warning as the destination pointer "buf"
> pointers to
> an ZERO length array, which actually will occupy alg.buf mostly.
> Fix this by using memcpy.
>
> struct xfrm_algo {
> char alg_name[64];
> unsigned int alg_key_len; /* in bits */
> char alg_key[0];
> };
>
> struct {
> union {
> struct xfrm_algo alg;
> struct xfrm_algo_aead aead;
> struct xfrm_algo_auth auth;
> } u;
> char buf[XFRM_ALGO_KEY_BUF_SIZE];
> } alg = {};
>
> buf = alg.u.alg.alg_key;
> ---
> ip/xfrm_state.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c
> index 0d98e78..5cc87d3 100644
> --- a/ip/xfrm_state.c
> +++ b/ip/xfrm_state.c
> @@ -159,7 +159,7 @@ static int xfrm_algo_parse(struct xfrm_algo *alg,
> enum xfrm_attr_type_t type,
> if (len > max)
> invarg("\"ALGO-KEY\" makes buffer overflow\n", key);
>
> - strncpy(buf, key, len);
> + memcpy(buf, key, len);
> }
> }
>
^ permalink raw reply
* [PATCH] ipv6: sit: ensure prefixlen <= 128 before calling ipv6_addr_prefix
From: Hannes Frederic Sowa @ 2013-09-29 3:15 UTC (permalink / raw)
To: netdev
Found with coverity: CID 139492
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
---
net/ipv6/sit.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 7ee5cb9..4011178 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1004,6 +1004,9 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
ip6rd->prefixlen + (32 - ip6rd->relay_prefixlen) > 64)
return -EINVAL;
+ if (ip6rd->prefixlen > 128)
+ return -EINVAL;
+
ipv6_addr_prefix(&prefix, &ip6rd->prefix, ip6rd->prefixlen);
if (!ipv6_addr_equal(&prefix, &ip6rd->prefix))
return -EINVAL;
--
1.8.3.1
^ permalink raw reply related
* Re: [PATCH net-next] virtio-net: switch to use XPS to choose txq
From: Jason Wang @ 2013-09-29 3:14 UTC (permalink / raw)
To: Michael S. Tsirkin; +Cc: netdev, linux-kernel, virtualization
In-Reply-To: <20130927143529.GA30007@redhat.com>
On 09/27/2013 10:35 PM, Michael S. Tsirkin wrote:
> On Fri, Sep 27, 2013 at 01:57:24PM +0800, Jason Wang wrote:
>> We used to use a percpu structure vq_index to record the cpu to queue
>> mapping, this is suboptimal since it duplicates the work of XPS and
>> loses all other XPS functionality such as allowing use to configure
>> their own transmission steering strategy.
>>
>> So this patch switches to use XPS and suggest a default mapping when
>> the number of cpus is equal to the number of queues. With XPS support,
>> there's no need for keeping per-cpu vq_index and .ndo_select_queue(),
>> so they were removed also.
>>
>> Cc: Rusty Russell <rusty@rustcorp.com.au>
>> Cc: Michael S. Tsirkin <mst@redhat.com>
>> Signed-off-by: Jason Wang <jasowang@redhat.com>
> More lines deleted that added is good :)
> But how does the result perform?
> About the same?
>
Yes, the same.
>> ---
>> drivers/net/virtio_net.c | 55 +++++++--------------------------------------
>> 1 files changed, 9 insertions(+), 46 deletions(-)
>>
>> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
>> index defec2b..4102c1b 100644
>> --- a/drivers/net/virtio_net.c
>> +++ b/drivers/net/virtio_net.c
>> @@ -127,9 +127,6 @@ struct virtnet_info {
>> /* Does the affinity hint is set for virtqueues? */
>> bool affinity_hint_set;
>>
>> - /* Per-cpu variable to show the mapping from CPU to virtqueue */
>> - int __percpu *vq_index;
>> -
>> /* CPU hot plug notifier */
>> struct notifier_block nb;
>> };
>> @@ -1063,7 +1060,6 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
>> static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu)
>> {
>> int i;
>> - int cpu;
>>
>> if (vi->affinity_hint_set) {
>> for (i = 0; i < vi->max_queue_pairs; i++) {
>> @@ -1073,20 +1069,11 @@ static void virtnet_clean_affinity(struct virtnet_info *vi, long hcpu)
>>
>> vi->affinity_hint_set = false;
>> }
>> -
>> - i = 0;
>> - for_each_online_cpu(cpu) {
>> - if (cpu == hcpu) {
>> - *per_cpu_ptr(vi->vq_index, cpu) = -1;
>> - } else {
>> - *per_cpu_ptr(vi->vq_index, cpu) =
>> - ++i % vi->curr_queue_pairs;
>> - }
>> - }
>> }
>>
>> static void virtnet_set_affinity(struct virtnet_info *vi)
>> {
>> + cpumask_var_t cpumask;
>> int i;
>> int cpu;
>>
>> @@ -1100,15 +1087,21 @@ static void virtnet_set_affinity(struct virtnet_info *vi)
>> return;
>> }
>>
>> + if (!alloc_cpumask_var(&cpumask, GFP_KERNEL))
>> + return;
>> +
>> i = 0;
>> for_each_online_cpu(cpu) {
>> virtqueue_set_affinity(vi->rq[i].vq, cpu);
>> virtqueue_set_affinity(vi->sq[i].vq, cpu);
>> - *per_cpu_ptr(vi->vq_index, cpu) = i;
>> + cpumask_clear(cpumask);
>> + cpumask_set_cpu(cpu, cpumask);
>> + netif_set_xps_queue(vi->dev, cpumask, i);
>> i++;
>> }
>>
>> vi->affinity_hint_set = true;
>> + free_cpumask_var(cpumask);
>> }
>>
>> static int virtnet_cpu_callback(struct notifier_block *nfb,
>> @@ -1217,28 +1210,6 @@ static int virtnet_change_mtu(struct net_device *dev, int new_mtu)
>> return 0;
>> }
>>
>> -/* To avoid contending a lock hold by a vcpu who would exit to host, select the
>> - * txq based on the processor id.
>> - */
>> -static u16 virtnet_select_queue(struct net_device *dev, struct sk_buff *skb)
>> -{
>> - int txq;
>> - struct virtnet_info *vi = netdev_priv(dev);
>> -
>> - if (skb_rx_queue_recorded(skb)) {
>> - txq = skb_get_rx_queue(skb);
>> - } else {
>> - txq = *__this_cpu_ptr(vi->vq_index);
>> - if (txq == -1)
>> - txq = 0;
>> - }
>> -
>> - while (unlikely(txq >= dev->real_num_tx_queues))
>> - txq -= dev->real_num_tx_queues;
>> -
>> - return txq;
>> -}
>> -
>> static const struct net_device_ops virtnet_netdev = {
>> .ndo_open = virtnet_open,
>> .ndo_stop = virtnet_close,
>> @@ -1250,7 +1221,6 @@ static const struct net_device_ops virtnet_netdev = {
>> .ndo_get_stats64 = virtnet_stats,
>> .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
>> .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
>> - .ndo_select_queue = virtnet_select_queue,
>> #ifdef CONFIG_NET_POLL_CONTROLLER
>> .ndo_poll_controller = virtnet_netpoll,
>> #endif
>> @@ -1559,10 +1529,6 @@ static int virtnet_probe(struct virtio_device *vdev)
>> if (vi->stats == NULL)
>> goto free;
>>
>> - vi->vq_index = alloc_percpu(int);
>> - if (vi->vq_index == NULL)
>> - goto free_stats;
>> -
>> mutex_init(&vi->config_lock);
>> vi->config_enable = true;
>> INIT_WORK(&vi->config_work, virtnet_config_changed_work);
>> @@ -1589,7 +1555,7 @@ static int virtnet_probe(struct virtio_device *vdev)
>> /* Allocate/initialize the rx/tx queues, and invoke find_vqs */
>> err = init_vqs(vi);
>> if (err)
>> - goto free_index;
>> + goto free_stats;
>>
>> netif_set_real_num_tx_queues(dev, 1);
>> netif_set_real_num_rx_queues(dev, 1);
>> @@ -1640,8 +1606,6 @@ free_recv_bufs:
>> free_vqs:
>> cancel_delayed_work_sync(&vi->refill);
>> virtnet_del_vqs(vi);
>> -free_index:
>> - free_percpu(vi->vq_index);
>> free_stats:
>> free_percpu(vi->stats);
>> free:
>> @@ -1678,7 +1642,6 @@ static void virtnet_remove(struct virtio_device *vdev)
>>
>> flush_work(&vi->config_work);
>>
>> - free_percpu(vi->vq_index);
>> free_percpu(vi->stats);
>> free_netdev(vi->dev);
>> }
>> --
>> 1.7.1
^ permalink raw reply
* Re: [PATCH v2.40 0/7] MPLS actions and matches
From: Joe Stringer @ 2013-09-29 2:35 UTC (permalink / raw)
To: Simon Horman
Cc: dev-yBygre7rU0TnMu66kgdUjQ@public.gmane.org, Ravi K,
netdev-u79uwXL29TY76Z2rM5mHXA, Isaku Yamahata
In-Reply-To: <1380241116-7661-1-git-send-email-horms-/R6kz+dDXgpPR4JQBCEnsQ@public.gmane.org>
[-- Attachment #1.1: Type: text/plain, Size: 9109 bytes --]
Hi Simon,
I've dropped some comments on a couple of the userspace patches, but I
probably don't need to comment on everything as Ben's given some pretty
clear feedback. The crux would be dropping #3 in favour of something a bit
tidier. Feel free to prod my brain if need be :-).
On Fri, Sep 27, 2013 at 12:18 PM, Simon Horman <horms-/R6kz+dDXgpPR4JQBCEnsQ@public.gmane.org> wrote:
> Hi,
>
> This series implements MPLS actions and matches based on work by
> Ravi K, Leo Alterman, Yamahata-san and Joe Stringer.
>
> This series provides two changes
>
> * Patches 1 - 5
>
> Provide user-space support for the VLAN/MPLS tag insertion order
> up to and including OpenFlow 1.2, and the different ordering
> specified from OpenFlow 1.3. In a nutshell the datapath always
> uses the OpenFlow 1.3 ordering, which is to always insert tags
> immediately after the L2 header, regardless of the presence of other
> tags. And ovs-vswtichd provides compatibility for the behaviour up
> to OpenFlow 1.2, which is that MPLS tags should follow VLAN tags
> if present.
>
> Ben, these are for you to review.
>
> * Patches 6 and 7
>
> Adding basic MPLS action and match support to the kernel datapath
>
> Jesse, these are for you to review.
>
>
> Differences between v2.40 and v2.39:
>
> * Rebase for:
> + New dev_queue_xmit compat code
> + Updated put_vlan()
> + Removal of mpls_depth field from struct flow
> * As suggested by Jesse Gross
> + Remove bogus mac_len update from push_mpls()
> + Slightly simplify push_mpls() by using eth_hdr()
> + Remove dubious condition !eth_p_mpls(inner_protocol) on
> an skb being considered to be MPLS in netdev_send()
> + Only use compatibility code for MPLS GSO segmentation on kernels
> older than 3.11
> + Revamp setting of inner_protocol
> 1. Do not unconditionally set inner_protocol to the value of
> skb->protocol in ovs_execute_actions().
> 2. Initialise inner_protocol it to zero only if compatibility code is
> in
> use. In the case where compatibility code is not in use it will
> either
> be zero due since the allocation of the skb or some other value set
> by some other user.
> 3. Conditionally set the inner_protocol in push_mpls() to the value of
> skb->protocol when entering push_mpls(). The condition is that
> inner_protocol is zero and the value of skb->protocol is not an MPLS
> ethernet type.
> - This new scheme:
> + Pushes logic to set inner_protocol closer to the case where it is
> needed.
> + Avoids over-writing values set by other users.
> * As suggested by Pravin Shelar
> + Only set and restore skb->protocol in rpl___skb_gso_segment() in the
> case of MPLS
> + Add inner_protocol field to struct ovs_gso_cb instead of ovs_skb_cb.
> This moves compatibility code closer to where it is used
> and creates fewer differences with mainline.
> * Update comment on mac_len updates in datapath/actions.c
> * Remove HAVE_INNER_PROCOTOL and instead just check
> against kernel version 3.11 directly.
> HAVE_INNER_PROCOTOL is a hang-over from work done prior
> to the merge of inner_protocol into the kernel.
> * Remove dubious condition !eth_p_mpls(inner_protocol) on
> using inner_protocol as the type in rpl_skb_network_protocol()
> * Do not update type of features in rpl_dev_queue_xmit.
> Though arguably correct this is not an inherent part of
> the changes made by this patch.
> * Use skb_cow_head() in push_mpls()
> + Call skb_cow_head(skb, MPLS_HLEN) instead of
> make_writable(skb, skb->mac_len) to ensure that there is enough head
> room to push an MPLS LSE regardless of whether the skb is cloned or
> not.
> + This is consistent with the behaviour of rpl__vlan_put_tag().
> + This is a fix for crashes reported when performing mpls_push
> with headroom less than 4. This problem was introduced in v3.36.
> * Skip popping in mpls_pop if the skb is too short to contain an MPLS LSE
>
>
> Differences between v2.39 and v2.38:
>
> * Rebase for removal of vlan, checksum and skb->mark compat code
> - This includes adding adding a new patch,
> "[PATCH v2.39 6/7] datapath: Break out deacceleration portion of
> vlan_push" to allow re-use of some existing code.
>
>
> Differences between v2.38 and v2.37:
>
> * Rebase for SCTP support
> * Refactor validate_tp_port() to iterate over eth_types rather
> than open-coding the loop. With the addition of SCTP this logic
> is now used three times.
>
>
> Differences between v2.37 and v2.36:
>
> * Rebase
>
>
> Differences between v2.36 and v2.35:
>
> * Rebase
>
> * Do not add set_ethertype() to datapath/actions.c.
> As this patch has evolved this function had devolved into
> to sets of functionality wrapped into a single function with
> only one line of common code. Refactor things to simply
> open-code setting the ether type in the two locations where
> set_ethertype() was previously used. The aim here is to improve
> readability.
>
> * Update setting skb->ethertype after mpls push and pop.
> - In the case of push_mpls it should be set unconditionally
> as in v2.35 the behaviour of this function to always push
> an MPLS LSE before any VLAN tags.
> - In the case of mpls_pop eth_p_mpls(skb->protocol) is a better
> test than skb->protocol != htons(ETH_P_8021Q) as it will give the
> correct behaviour in the presence of other VLAN ethernet types,
> for example 0x88a8 which is used by 802.1ad. Moreover, it seems
> correct to update the ethernet type if it was previously set
> according to the top-most MPLS LSE.
>
> * Deaccelerate VLANs when pushing MPLS tags the
> - Since v2.35 MPLS push will insert an MPLS LSE before any VLAN tags.
> This means that if an accelerated tag is present it should be
> deaccelerated to ensure it ends up in the correct position.
>
> * Update skb->mac_len in push_mpls() so that it will be correct
> when used by a subsequent call to pop_mpls().
>
> As things stand I do not believe this is strictly necessary as
> ovs-vswitchd will not send a pop MPLS action after a push MPLS action.
> However, I have added this in order to code more defensively as I believe
> that if such a sequence did occur it would be rather unobvious why
> it didn't work.
>
> * Do not add skb_cow_head() call in push_mpls().
> It is unnecessary as there is a make_writable() call.
> This change was also made in v2.30 but some how the
> code regressed between then and v2.35.
>
>
> Differences between v2.35 and v2.34:
>
> * Add support for the tag ordering specified up until OpenFlow 1.2 and
> the ordering specified from OpenFlow 1.3.
>
> * Correct error in datapath patch's handling of GSO in the presence
> of MPLS and absence of VLANs.
>
>
> Pre-requisites.
>
> This series applies on top of "[PATCH v3] Remove mpls_depth field from
> flow"
>
> To aid review this series and its pre-requisite is available in git at:
>
> git://github.com/horms/openvswitch.git devel/mpls-v2.40
>
>
> Patch list and overall diffstat:
>
> Joe Stringer (5):
> odp: Only pass vlan_tci to commit_vlan_action()
> odp: Allow VLAN actions after MPLS actions
> ofp-actions: Add OFPUTIL_OFPAT13_PUSH_MPLS
> ofp-actions: Add separate OpenFlow 1.3 action parser
> lib: Push MPLS tags in the OpenFlow 1.3 ordering
>
> Simon Horman (2):
> datapath: Break out deacceleration portion of vlan_push
> datapath: Add basic MPLS support to kernel
>
> datapath/Modules.mk | 1 +
> datapath/actions.c | 156 ++++++++-
> datapath/datapath.c | 259 ++++++++++++--
> datapath/datapath.h | 2 +
> datapath/flow.c | 58 ++-
> datapath/flow.h | 17 +-
> datapath/linux/compat/gso.c | 117 ++++++-
> datapath/linux/compat/gso.h | 53 +++
> datapath/linux/compat/include/linux/netdevice.h | 14 +-
> datapath/linux/compat/netdevice.c | 28 --
> datapath/mpls.h | 15 +
> include/linux/openvswitch.h | 7 +-
> lib/flow.c | 2 +-
> lib/odp-util.c | 21 +-
> lib/odp-util.h | 2 +-
> lib/ofp-actions.c | 68 +++-
> lib/ofp-parse.c | 1 +
> lib/ofp-util.c | 3 +
> lib/ofp-util.h | 1 +
> lib/packets.c | 10 +-
> lib/packets.h | 2 +-
> ofproto/ofproto-dpif-xlate.c | 98 ++++--
> ofproto/ofproto-dpif-xlate.h | 5 +
> tests/ofproto-dpif.at | 446
> ++++++++++++++++++++++++
> 24 files changed, 1246 insertions(+), 140 deletions(-)
> create mode 100644 datapath/mpls.h
>
>
[-- Attachment #1.2: Type: text/html, Size: 10319 bytes --]
[-- Attachment #2: Type: text/plain, Size: 0 bytes --]
^ permalink raw reply
* Re: [PATCH v2.40 3/7] ofp-actions: Add OFPUTIL_OFPAT13_PUSH_MPLS
From: Joe Stringer @ 2013-09-29 2:07 UTC (permalink / raw)
To: Ben Pfaff
Cc: dev-yBygre7rU0TnMu66kgdUjQ@public.gmane.org, Ravi K,
netdev-u79uwXL29TY76Z2rM5mHXA, Isaku Yamahata
In-Reply-To: <20130927193010.GB17506-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
[-- Attachment #1.1: Type: text/plain, Size: 928 bytes --]
I recall having a bit of discussion regarding this approach and whether to
change it, but I don't recall the issues around this.
Your suggestion sounds sane. What are your thoughts, Simon?
On Sat, Sep 28, 2013 at 7:30 AM, Ben Pfaff <blp-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org> wrote:
> On Fri, Sep 27, 2013 at 09:18:32AM +0900, Simon Horman wrote:
> > From: Joe Stringer <joe-Q1GJJQv1iO6lP80pJB477g@public.gmane.org>
> >
> > This patch adds a new compatibility enum for use with MPLS, so that the
> > differing behaviour between OpenFlow 1.2 and 1.3 can be implemented in
> > ofproto-dpif-xlate.
>
> It seems a little awkward to me to do this via a new OFPACT_, mostly
> because there isn't currently any distinction between OF1.1 and OF1.3 in
> terms of OFPACT_ definitions. Did you consider adding a new field to
> struct ofpact_push_mpls that would say whether the label should be added
> before or after a VLAN tag?
>
[-- Attachment #1.2: Type: text/html, Size: 1424 bytes --]
[-- Attachment #2: Type: text/plain, Size: 0 bytes --]
^ permalink raw reply
* Re: [PATCH v2.40 2/7] odp: Allow VLAN actions after MPLS actions
From: Joe Stringer @ 2013-09-29 1:51 UTC (permalink / raw)
To: Ben Pfaff
Cc: dev-yBygre7rU0TnMu66kgdUjQ@public.gmane.org, Ravi K,
netdev-u79uwXL29TY76Z2rM5mHXA, Isaku Yamahata
In-Reply-To: <20130927192108.GA17506-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org>
[-- Attachment #1.1: Type: text/plain, Size: 3126 bytes --]
On Sat, Sep 28, 2013 at 7:21 AM, Ben Pfaff <blp-l0M0P4e3n4LQT0dZR+AlfA@public.gmane.org> wrote:
> On Fri, Sep 27, 2013 at 09:18:31AM +0900, Simon Horman wrote:
> > From: Joe Stringer <joe-Q1GJJQv1iO6lP80pJB477g@public.gmane.org>
> >
> > OpenFlow 1.2 and 1.3 differ on their handling of MPLS actions in the
> > presence of VLAN tags. To allow correct behaviour to be committed in
> > each situation, this patch adds a second round of VLAN tag action
> > handling to commit_odp_actions(), which occurs after MPLS actions. This
> > is implemented with a new field in 'struct xlate_in' called 'vlan_tci'.
> >
> > When an push_mpls action is composed, the flow's current VLAN state is
> > stored into xin->vlan_tci, and flow->vlan_tci is set to 0 (pop_vlan). If
> > a VLAN tag is present, it is stripped; if not, then there is no change.
> > Any later modifications to the VLAN state is written to xin->vlan_tci.
> > When committing the actions, flow->vlan_tci is used before MPLS actions,
> > and xin->vlan_tci is used afterwards. This retains the current datapath
> > behaviour, but allows VLAN actions to be applied in a more flexible
> > manner.
> >
> > Signed-off-by: Joe Stringer <joe-Q1GJJQv1iO6lP80pJB477g@public.gmane.org>
> > Signed-off-by: Simon Horman <horms-/R6kz+dDXgpPR4JQBCEnsQ@public.gmane.org>
>
> The commit message talks about handling OpenFlow 1.2 and 1.3 both
> properly, but I don't see how the code distinguishes between the cases.
> Can you explain? Maybe this is something added in a later patch, in
> which case it would be nice to mention that in the commit message.
>
It is added in patch #5. IIRC this patch should cause no difference in
behaviour, but set up infrastructure to be used later.
There seems to be a typo in the comment in vlan_tci_restore() here:
> > + /* If MPLS actions were executed after MPLS, copy the final
> vlan_tci out
> > + * and restore the intermediate VLAN state. */
>
Right, that should probably be "...executed after VLAN actions...".
I was a little confused by the new local variable 'vlan_tci' in
> do_xlate_actions(). Partly this was because the fact that I didn't find
> it obvious that sometimes it points to different VLAN tags. I think
> this would be easier to see if it were initially assigned just under the
> big comment rather than in an initializer, so that one would know to
> look at the comment.
>
Perhaps the big comment could be rearranged and put above the initialiser,
something like the following:-
/* VLAN actions are stored to '*vlan_tci'. This variable initially points
to 'xin->flow->vlan_tci', so that
* VLAN actions are applied before any MPLS actions. When an MPLS action is
translated,
* 'vlan_tci' is updated to point to 'xin->vlan_tci'. This causes later
VLAN actions to be applied after MPLS actions.
* Each time through the loop, 'xin->vlan_tci' is updated to reflect the
final VLAN state of the flow. */
Then, the place where 'xin->vlan_tci' is updated to '*vlan_tci' could have
a simple comment to refer back:-
/* Update the final vlan state to match the current state. */
Simon, do you mind handling the change?
[-- Attachment #1.2: Type: text/html, Size: 4864 bytes --]
[-- Attachment #2: Type: text/plain, Size: 0 bytes --]
^ permalink raw reply
* [PATCH] {iproute2, xfrm}: Use memcpy to suppress gcc phony buffer overflow warning
From: Fan Du @ 2013-09-29 1:49 UTC (permalink / raw)
To: Sohny Thomas; +Cc: David Laight, stephen, netdev
In-Reply-To: <5246D88F.7090906@linux.vnet.ibm.com>
On 2013年09月28日 21:24, Sohny Thomas wrote:
> On Friday 27 September 2013 01:56 PM, David Laight wrote:
>>> ip xfrm state add causes a SIGABRT due to a strncpy_chk .
>>> This happens since strncpy doesn't account for the '\0' .
>>> I have fixed this using sizeof instead of strlen .
>>>
>>> There is a redhat bug which documents this issue
>>>
>>> https://bugzilla.redhat.com/show_bug.cgi?id=982761
>>>
>>> Signed-off-by: Sohny Thomas <sohthoma@in.ibm.com>
>>>
>>> --------------
>>>
>>> diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c
>>> index 389942c..7dd8799 100644
>>> --- a/ip/xfrm_state.c
>>> +++ b/ip/xfrm_state.c
>>> @@ -117,7 +117,7 @@ static int xfrm_algo_parse(struct xfrm_algo *alg,
>>> enum xfrm_attr_type_t type,
>>> char *name, char *key, char *buf, int max)
>>> {
>>> int len;
>>> - int slen = strlen(key);
>>> + int slen = sizeof(key);
>>
>> you definitely don't want sizeof(key) - that is either 4 or 8.
> oh damn my bad.
> I think i will go with strlen(key) + 1.
>
> or i will pass slen+1 to strncpy .
You must be kidding about this slen+1 or strlen(key) + 1 , right?
From 3895b3d88bcb873988089392079645f2c9665e95 Mon Sep 17 00:00:00 2001
From: Fan Du <fan.du@windriver.com>
Date: Sun, 29 Sep 2013 09:38:12 +0800
Subject: [PATCH] {iproute2, xfrm}: Use memcpy to suppress gcc phony buffer
overflow warning.
This bug is reported from below link:
https://bugzilla.redhat.com/show_bug.cgi?id=982761
An simplified command from its original reproducing method in bugzilla:
ip xfrm state add src 10.0.0.2 dst 10.0.0.1 proto ah spi 0x12345678 auth md5 12
will cause below spew from gcc:
*** buffer overflow detected ***: ./ip terminated
======= Backtrace: =========
/lib/x86_64-linux-gnu/libc.so.6(__fortify_fail+0x37)[0x7f894ae2b817]
/lib/x86_64-linux-gnu/libc.so.6(+0x109710)[0x7f894ae2a710]
/lib/x86_64-linux-gnu/libc.so.6(+0x1089f6)[0x7f894ae299f6]
./ip[0x42119d]
./ip(do_xfrm_state+0x231)[0x4217f1]
./ip[0x405d05]
./ip(main+0x2b4)[0x405934]
/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xed)[0x7f894ad4276d]
./ip[0x405bb9]
======= Memory map: ========
00400000-0043c000 r-xp 00000000 08:01 1997183 /workbench/iproute2/ip/ip
0063b000-0063c000 r--p 0003b000 08:01 1997183 /workbench/iproute2/ip/ip
0063c000-00640000 rw-p 0003c000 08:01 1997183 /workbench/iproute2/ip/ip
00640000-00643000 rw-p 00000000 00:00 0
017c7000-017e8000 rw-p 00000000 00:00 0 [heap]
7f894ab0b000-7f894ab20000 r-xp 00000000 08:01 6032986 /lib/x86_64-linux-gnu/libgcc_s.so.1
7f894ab20000-7f894ad1f000 ---p 00015000 08:01 6032986 /lib/x86_64-linux-gnu/libgcc_s.so.1
7f894ad1f000-7f894ad20000 r--p 00014000 08:01 6032986 /lib/x86_64-linux-gnu/libgcc_s.so.1
7f894ad20000-7f894ad21000 rw-p 00015000 08:01 6032986 /lib/x86_64-linux-gnu/libgcc_s.so.1
7f894ad21000-7f894aed6000 r-xp 00000000 08:01 6033273 /lib/x86_64-linux-gnu/libc-2.15.so
7f894aed6000-7f894b0d5000 ---p 001b5000 08:01 6033273 /lib/x86_64-linux-gnu/libc-2.15.so
7f894b0d5000-7f894b0d9000 r--p 001b4000 08:01 6033273 /lib/x86_64-linux-gnu/libc-2.15.so
7f894b0d9000-7f894b0db000 rw-p 001b8000 08:01 6033273 /lib/x86_64-linux-gnu/libc-2.15.so
7f894b0db000-7f894b0e0000 rw-p 00000000 00:00 0
7f894b0e0000-7f894b0e2000 r-xp 00000000 08:01 6033272 /lib/x86_64-linux-gnu/libdl-2.15.so
7f894b0e2000-7f894b2e2000 ---p 00002000 08:01 6033272 /lib/x86_64-linux-gnu/libdl-2.15.so
7f894b2e2000-7f894b2e3000 r--p 00002000 08:01 6033272 /lib/x86_64-linux-gnu/libdl-2.15.so
7f894b2e3000-7f894b2e4000 rw-p 00003000 08:01 6033272 /lib/x86_64-linux-gnu/libdl-2.15.so
7f894b2e4000-7f894b306000 r-xp 00000000 08:01 6033287 /lib/x86_64-linux-gnu/ld-2.15.so
7f894b4e3000-7f894b4e6000 rw-p 00000000 00:00 0
7f894b503000-7f894b506000 rw-p 00000000 00:00 0
7f894b506000-7f894b507000 r--p 00022000 08:01 6033287 /lib/x86_64-linux-gnu/ld-2.15.so
7f894b507000-7f894b509000 rw-p 00023000 08:01 6033287 /lib/x86_64-linux-gnu/ld-2.15.so
7fff9a3c2000-7fff9a3e3000 rw-p 00000000 00:00 0 [stack]
7fff9a3ff000-7fff9a400000 r-xp 00000000 00:00 0 [vdso]
ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]
This is a false positive warning as the destination pointer "buf" pointers to
an ZERO length array, which actually will occupy alg.buf mostly.
Fix this by using memcpy.
struct xfrm_algo {
char alg_name[64];
unsigned int alg_key_len; /* in bits */
char alg_key[0];
};
struct {
union {
struct xfrm_algo alg;
struct xfrm_algo_aead aead;
struct xfrm_algo_auth auth;
} u;
char buf[XFRM_ALGO_KEY_BUF_SIZE];
} alg = {};
buf = alg.u.alg.alg_key;
---
ip/xfrm_state.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c
index 0d98e78..5cc87d3 100644
--- a/ip/xfrm_state.c
+++ b/ip/xfrm_state.c
@@ -159,7 +159,7 @@ static int xfrm_algo_parse(struct xfrm_algo *alg, enum xfrm_attr_type_t type,
if (len > max)
invarg("\"ALGO-KEY\" makes buffer overflow\n", key);
- strncpy(buf, key, len);
+ memcpy(buf, key, len);
}
}
--
1.7.9.5
> Regards,
> Sohny
>>
>> David
>>
>>
>>
>>
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
--
浮沉随浪只记今朝笑
--fan
^ permalink raw reply related
* Re: [PATCH v5] IPv6 NAT: Do not drop DNATed 6to4/6rd packets
From: Joe Perches @ 2013-09-29 0:24 UTC (permalink / raw)
To: David Miller; +Cc: hannes, catab, netdev, yoshfuji
In-Reply-To: <20130928.155750.1130089685321379918.davem@davemloft.net>
On Sat, 2013-09-28 at 15:57 -0400, David Miller wrote:
> Applied, but Catalin please strictly refer to changes in the following
> precise format:
>
> commit $SHA1_ID ("Commit message header line text")
>
> Because SHA1_IDs are ambiguous, especially when the change in question
> is backported into various -stable branches.
There are now enough commits that using only
8 byte SHA1 IDs generates some collisions so
please use 12 bytes or so of the SHA1.
^ permalink raw reply
* Re: [PATCH v2] declance: Remove `incompatible pointer type' warnings
From: Maciej W. Rozycki @ 2013-09-29 0:09 UTC (permalink / raw)
To: David Miller; +Cc: sergei.shtylyov, netdev
In-Reply-To: <20130928.153316.1766304011894422920.davem@davemloft.net>
On Sat, 28 Sep 2013, David Miller wrote:
> > Thanks, by Sergei's request please use this version instead that has the
> > reference to the original commit updated (no change to the patch itself).
>
> I can't undo the original commit once it has been installed in my tree.
No worries, and good to know, thanks.
Maciej
^ permalink raw reply
* Re: [PATCH v2] bgmac: add support for Byte Queue Limits
From: Eric Dumazet @ 2013-09-28 23:22 UTC (permalink / raw)
To: Hauke Mehrtens; +Cc: davem, zajec5, netdev
In-Reply-To: <1380409400.3596.9.camel@edumazet-glaptop.roam.corp.google.com>
On Sat, 2013-09-28 at 16:03 -0700, Eric Dumazet wrote:
> Unfortunately, skb->len is unsafe : hardware might already sent the
> packet and TX completion have freed it.
>
> You must cache skb->len in a variable before allowing hardware to send
> the frame.
>
More exactly you must call netdev_sent_queue() _before_ giving skb to
hardware, or netdev_completed_queue() might panic.
^ permalink raw reply
* Re: [PATCH v2] bgmac: add support for Byte Queue Limits
From: Eric Dumazet @ 2013-09-28 23:03 UTC (permalink / raw)
To: Hauke Mehrtens; +Cc: davem, zajec5, netdev
In-Reply-To: <1380407726-20691-1-git-send-email-hauke@hauke-m.de>
On Sun, 2013-09-29 at 00:35 +0200, Hauke Mehrtens wrote:
> This makes it possible to use some more advanced queuing
> techniques with this driver.
>
> When multi queue support will be added some changes to Byte Queue
> handling is needed.
>
> Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
> ---
> drivers/net/ethernet/broadcom/bgmac.c | 10 ++++++++++
> 1 file changed, 10 insertions(+)
>
> diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
> index 249468f..e5519f1 100644
> --- a/drivers/net/ethernet/broadcom/bgmac.c
> +++ b/drivers/net/ethernet/broadcom/bgmac.c
> @@ -164,6 +164,8 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
> if (--free_slots == 1)
> netif_stop_queue(net_dev);
>
> + netdev_sent_queue(net_dev, skb->len);
> +
> return NETDEV_TX_OK;
>
Unfortunately, skb->len is unsafe : hardware might already sent the
packet and TX completion have freed it.
You must cache skb->len in a variable before allowing hardware to send
the frame.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox