* [patch net-next] net: sched: fix skb->protocol use in case of accelerated vlan path
From: Jiri Pirko @ 2015-01-12 10:19 UTC (permalink / raw)
To: netdev; +Cc: davem, jhs
tc code implicitly considers skb->protocol even in case of accelerated
vlan paths and expects vlan protocol type here. However, on rx path,
if the vlan header was already stripped, skb->protocol contains value
of next header. Similar situation is on tx path.
So for skbs that use skb->vlan_tci for tagging, use skb->vlan_proto instead.
Reported-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
Note that this is present since vlan accel was introduced, pre-git times.
Please consider this for stable.
include/net/pkt_sched.h | 12 ++++++++++++
net/sched/act_csum.c | 2 +-
net/sched/cls_flow.c | 8 ++++----
net/sched/em_ipset.c | 2 +-
net/sched/em_meta.c | 2 +-
net/sched/sch_api.c | 2 +-
net/sched/sch_dsmark.c | 6 +++---
net/sched/sch_teql.c | 4 ++--
8 files changed, 25 insertions(+), 13 deletions(-)
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 27a3383..cd590f7 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -3,6 +3,7 @@
#include <linux/jiffies.h>
#include <linux/ktime.h>
+#include <linux/if_vlan.h>
#include <net/sch_generic.h>
struct qdisc_walker {
@@ -114,6 +115,17 @@ int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res);
+static inline __be16 tc_skb_protocol(struct sk_buff *skb)
+{
+ /* We need to take extra care in case the skb came via
+ * vlan accelerated path. In that case, use skb->vlan_proto
+ * as the original vlan header was already stripped.
+ */
+ if (vlan_tx_tag_present(skb))
+ return skb->vlan_proto;
+ return skb->protocol;
+}
+
/* Calculate maximal size of packet seen by hard_start_xmit
routine of this device.
*/
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index edbf40d..4cd5cf1 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -509,7 +509,7 @@ static int tcf_csum(struct sk_buff *skb,
if (unlikely(action == TC_ACT_SHOT))
goto drop;
- switch (skb->protocol) {
+ switch (tc_skb_protocol(skb)) {
case cpu_to_be16(ETH_P_IP):
if (!tcf_csum_ipv4(skb, update_flags))
goto drop;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 15d68f2..4614103 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -77,7 +77,7 @@ static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{
if (flow->dst)
return ntohl(flow->dst);
- return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
+ return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
}
static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow)
@@ -98,7 +98,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys
if (flow->ports)
return ntohs(flow->port16[1]);
- return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
+ return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
}
static u32 flow_get_iif(const struct sk_buff *skb)
@@ -144,7 +144,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb)
static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow)
{
- switch (skb->protocol) {
+ switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
return ntohl(CTTUPLE(skb, src.u3.ip));
case htons(ETH_P_IPV6):
@@ -156,7 +156,7 @@ fallback:
static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow)
{
- switch (skb->protocol) {
+ switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
return ntohl(CTTUPLE(skb, dst.u3.ip));
case htons(ETH_P_IPV6):
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index 5b4a4ef..a3d79c8 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -59,7 +59,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
struct net_device *dev, *indev = NULL;
int ret, network_offset;
- switch (skb->protocol) {
+ switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
acpar.family = NFPROTO_IPV4;
if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index c8f8c39..2159981 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -197,7 +197,7 @@ META_COLLECTOR(int_priority)
META_COLLECTOR(int_protocol)
{
/* Let userspace take care of the byte ordering */
- dst->value = skb->protocol;
+ dst->value = tc_skb_protocol(skb);
}
META_COLLECTOR(int_pkttype)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 76f402e..243b7d1 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1807,7 +1807,7 @@ done:
int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
struct tcf_result *res)
{
- __be16 protocol = skb->protocol;
+ __be16 protocol = tc_skb_protocol(skb);
int err;
for (; tp; tp = rcu_dereference_bh(tp->next)) {
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 227114f..66700a6 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -203,7 +203,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p);
if (p->set_tc_index) {
- switch (skb->protocol) {
+ switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
if (skb_cow_head(skb, sizeof(struct iphdr)))
goto drop;
@@ -289,7 +289,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
index = skb->tc_index & (p->indices - 1);
pr_debug("index %d->%d\n", skb->tc_index, index);
- switch (skb->protocol) {
+ switch (tc_skb_protocol(skb)) {
case htons(ETH_P_IP):
ipv4_change_dsfield(ip_hdr(skb), p->mask[index],
p->value[index]);
@@ -306,7 +306,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
*/
if (p->mask[index] != 0xff || p->value[index])
pr_warn("%s: unsupported protocol %d\n",
- __func__, ntohs(skb->protocol));
+ __func__, ntohs(tc_skb_protocol(skb)));
break;
}
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 6ada423..2ad0c40 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -249,8 +249,8 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
char haddr[MAX_ADDR_LEN];
neigh_ha_snapshot(haddr, n, dev);
- err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
- NULL, skb->len);
+ err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)),
+ haddr, NULL, skb->len);
if (err < 0)
err = -EINVAL;
--
1.9.3
^ permalink raw reply related
* Re: [PATCH v4 4/4] can: kvaser_usb: Retry first bulk transfer on -ETIMEDOUT
From: Marc Kleine-Budde @ 2015-01-12 10:25 UTC (permalink / raw)
To: Ahmed S. Darwish
Cc: Olivier Sobrie, Oliver Hartkopp, Wolfgang Grandegger,
Greg Kroah-Hartman, Linux-USB, Linux-CAN, netdev, LKML
In-Reply-To: <20150112101407.GA9213@linux>
[-- Attachment #1: Type: text/plain, Size: 2413 bytes --]
On 01/12/2015 11:14 AM, Ahmed S. Darwish wrote:
> On Sun, Jan 11, 2015 at 09:51:10PM +0100, Marc Kleine-Budde wrote:
>> On 01/11/2015 09:45 PM, Ahmed S. Darwish wrote:
>>> From: Ahmed S. Darwish <ahmed.darwish@valeo.com>
>>>
>>> (This is a draft patch, I'm not sure if this fixes the USB
>>> bug or only its psymptom. Feedback from the linux-usb folks
>>> is really appreciated.)
>>>
>>> When plugging the Kvaser USB/CAN dongle the first time, everything
>>> works as expected and all of the transfers from and to the USB
>>> device succeeds.
>>>
>>> Meanwhile, after unplugging the device and plugging it again, the
>>> first bulk transfer _always_ returns an -ETIMEDOUT. The following
>>> behaviour was observied:
>>>
>>> - Setting higher timeout values for the first bulk transfer never
>>> solved the issue.
>>>
>>> - Unloading, then loading, our kvaser_usb module in question
>>> __always__ solved the issue.
>>>
>>> - Checking first bulk transfer status, and retry the transfer
>>> again in case of an -ETIMEDOUT also __always__ solved the issue.
>>> This is what the patch below does.
>>>
>>> - In the testing done so far, this issue appears only on laptops
>>> but never on PCs (possibly power related?)
>>>
>>> Signed-off-by: Ahmed S. Darwish <ahmed.darwish@valeo.com>
>>
>> Does this patch apply apply between 3 and 4? If not, please re-arrange
>> the series. As this is a bug fix, patches 1, 2 and 4 will go via
>> net/master, 3 will go via net-next/master.
>
> Since no one complained earlier, I guess this issue only affects
> USBCAN devices. That's why I've based it above patch #3: adding
> USBCAN hardware support.
>
> Nonetheless, it won't do any harm for the current Leaf-only
> driver. So _if_ this is the correct fix, I will update the commit
> log, refactor the check into a 'do { } while()' loop, and then
> base it above the Leaf-only net/master fixes on patch #1, and #2.
>
> Any feedback on the USB side of things?
Maybe you have to change the subject of this patch to be more visible on
the USB list and/or add the right USB people on Cc.
Marc
--
Pengutronix e.K. | Marc Kleine-Budde |
Industrial Linux Solutions | Phone: +49-231-2826-924 |
Vertretung West/Dortmund | Fax: +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686 | http://www.pengutronix.de |
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]
^ permalink raw reply
* Re: [net-next PATCH 1/1] net: sched: Introduce connmark action
From: Jiri Pirko @ 2015-01-12 10:44 UTC (permalink / raw)
To: Jamal Hadi Salim; +Cc: davem, nbd, pablo, fw, netdev
In-Reply-To: <1420980786-24466-1-git-send-email-jhs@emojatatu.com>
Sun, Jan 11, 2015 at 01:53:06PM CET, jhs@mojatatu.com wrote:
>From: Felix Fietkau <nbd@openwrt.org>
>
>This tc action allows you to retrieve the connection tracking mark
>
>There are known limitations currently:
>
>doesn't work for inital packets, since we only query the ct table.
> Fine given use case is for returning packets
>no implicit defrag.
> frags should be rare so fix later and what is a frag between friends
>won't work for more complex tasks, e.g. lookup of other extensions
> since we have no means to store results
>we still have a 2nd lookup later on via normal conntrack path.
> This shouldn't break anything though since skb->nfct isn't altered.
>
>Signed-off-by: Felix Fietkau <nbd@openwrt.org>
>Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
>---
> include/net/tc_act/tc_connmark.h | 14 ++
> include/uapi/linux/tc_act/tc_connmark.h | 22 ++++
> net/sched/Kconfig | 11 ++
> net/sched/Makefile | 1 +
> net/sched/act_connmark.c | 211 +++++++++++++++++++++++++++++++
> 5 files changed, 259 insertions(+)
> create mode 100644 include/net/tc_act/tc_connmark.h
> create mode 100644 include/uapi/linux/tc_act/tc_connmark.h
> create mode 100644 net/sched/act_connmark.c
>
>diff --git a/include/net/tc_act/tc_connmark.h b/include/net/tc_act/tc_connmark.h
>new file mode 100644
>index 0000000..5c1104c
>--- /dev/null
>+++ b/include/net/tc_act/tc_connmark.h
>@@ -0,0 +1,14 @@
>+#ifndef __NET_TC_CONNMARK_H
>+#define __NET_TC_CONNMARK_H
>+
>+#include <net/act_api.h>
>+
>+struct tcf_connmark_info {
>+ struct tcf_common common;
>+ u16 zone;
>+};
>+
>+#define to_connmark(a) \
>+ container_of(a->priv, struct tcf_connmark_info, common)
>+
>+#endif /* __NET_TC_CONNMARK_H */
>diff --git a/include/uapi/linux/tc_act/tc_connmark.h b/include/uapi/linux/tc_act/tc_connmark.h
>new file mode 100644
>index 0000000..82eda46
>--- /dev/null
>+++ b/include/uapi/linux/tc_act/tc_connmark.h
>@@ -0,0 +1,22 @@
>+#ifndef __UAPI_TC_CONNMARK_H
>+#define __UAPI_TC_CONNMARK_H
>+
>+#include <linux/types.h>
>+#include <linux/pkt_cls.h>
>+
>+#define TCA_ACT_CONNMARK 20
Although I think it makes no difference, this should probably be
increment of the last action number (vlan=12). I used 13 for bpf, so 14
here? Anyway what this number is here for?
>+
>+struct tc_connmark {
>+ tc_gen;
>+ __u16 zone;
>+};
>+
>+enum {
>+ TCA_CONNMARK_UNSPEC,
>+ TCA_CONNMARK_PARMS,
>+ TCA_CONNMARK_TM,
>+ __TCA_CONNMARK_MAX
>+};
>+#define TCA_CONNMARK_MAX (__TCA_CONNMARK_MAX - 1)
>+
>+#endif
>diff --git a/net/sched/Kconfig b/net/sched/Kconfig
>index c54c9d9..db20cae 100644
>--- a/net/sched/Kconfig
>+++ b/net/sched/Kconfig
>@@ -698,6 +698,17 @@ config NET_ACT_VLAN
> To compile this code as a module, choose M here: the
> module will be called act_vlan.
>
>+config NET_ACT_CONNMARK
>+ tristate "Netfilter Connection Mark Retriever"
>+ depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
>+ ---help---
>+ Say Y here to allow retrieving of conn mark
>+
>+ If unsure, say N.
>+
>+ To compile this code as a module, choose M here: the
>+ module will be called act_connmark.
>+
> config NET_CLS_IND
> bool "Incoming device classification"
> depends on NET_CLS_U32 || NET_CLS_FW
>diff --git a/net/sched/Makefile b/net/sched/Makefile
>index 679f24a..47304cd 100644
>--- a/net/sched/Makefile
>+++ b/net/sched/Makefile
>@@ -17,6 +17,7 @@ obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o
> obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o
> obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o
> obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o
>+obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o
> obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
> obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
> obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
>diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
>new file mode 100644
>index 0000000..f936434
>--- /dev/null
>+++ b/net/sched/act_connmark.c
>@@ -0,0 +1,211 @@
>+/*
>+ * net/sched/act_connmark.c netfilter connmark retriever action
>+ * skb mark is over-written
>+ *
>+ * Copyright (c) 2011 Felix Fietkau <nbd@openwrt.org>
>+ *
>+ * This program is free software; you can redistribute it and/or modify it
>+ * under the terms and conditions of the GNU General Public License,
>+ * version 2, as published by the Free Software Foundation.
>+ *
>+ * This program is distributed in the hope it will be useful, but WITHOUT
>+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
>+ * more details.
>+ *
>+ * You should have received a copy of the GNU General Public License along with
>+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
>+ * Place - Suite 330, Boston, MA 02111-1307 USA.
>+ *
>+ *
>+*/
>+
>+#include <linux/module.h>
>+#include <linux/init.h>
>+#include <linux/kernel.h>
>+#include <linux/skbuff.h>
>+#include <linux/rtnetlink.h>
>+#include <linux/pkt_cls.h>
>+#include <linux/ip.h>
>+#include <linux/ipv6.h>
>+#include <net/netlink.h>
>+#include <net/pkt_sched.h>
>+#include <net/act_api.h>
>+#include <uapi/linux/tc_act/tc_connmark.h>
>+#include <net/tc_act/tc_connmark.h>
>+
>+#include <net/netfilter/nf_conntrack.h>
>+#include <net/netfilter/nf_conntrack_core.h>
>+#include <net/netfilter/nf_conntrack_zones.h>
>+
>+#define CONNMARK_TAB_MASK 3
>+
>+static struct tcf_hashinfo connmark_hash_info;
>+
>+static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
>+ struct tcf_result *res)
>+{
>+ const struct nf_conntrack_tuple_hash *thash;
>+ struct nf_conntrack_tuple tuple;
>+ enum ip_conntrack_info ctinfo;
>+ struct tcf_connmark_info *ca = a->priv;
>+ struct nf_conn *c;
>+ int proto;
>+
>+ spin_lock(&ca->tcf_lock);
>+ ca->tcf_tm.lastuse = jiffies;
>+ bstats_update(&ca->tcf_bstats, skb);
>+
>+ if (skb->protocol == htons(ETH_P_IP)) {
>+ if (skb->len < sizeof(struct iphdr)) {
>+ goto out;
>+ }
{} should be avoided here.
>+ proto = NFPROTO_IPV4;
>+ } else if (skb->protocol == htons(ETH_P_IPV6)) {
>+ if (skb->len < sizeof(struct ipv6hdr)) {
>+ goto out;
>+ }
^^ same here.
Otherwise this looks fine to me. Feel free to add my ack to v2
>+ proto = NFPROTO_IPV6;
>+ } else {
>+ goto out;
>+ }
>+
>+ c = nf_ct_get(skb, &ctinfo);
>+ if (c != NULL) {
>+ skb->mark = c->mark;
>+ /* using overlimits stats to count how many packets marked */
>+ ca->tcf_qstats.overlimits++;
>+ nf_ct_put(c);
>+ goto out;
>+ }
>+
>+ if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
>+ proto, &tuple))
>+ goto out;
>+
>+ thash = nf_conntrack_find_get(dev_net(skb->dev), ca->zone, &tuple);
>+ if (!thash)
>+ goto out;
>+
>+ c = nf_ct_tuplehash_to_ctrack(thash);
>+ /* using overlimits stats to count how many packets marked */
>+ ca->tcf_qstats.overlimits++;
>+ skb->mark = c->mark;
>+ nf_ct_put(c);
>+
>+out:
>+ skb->nfct = NULL;
>+ spin_unlock(&ca->tcf_lock);
>+ return ca->tcf_action;
>+}
>+
>+static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
>+ [TCA_CONNMARK_PARMS] = { .len = sizeof(struct tc_connmark) },
>+};
>+
>+static int tcf_connmark_init(struct net *net, struct nlattr *nla,
>+ struct nlattr *est, struct tc_action *a,
>+ int ovr, int bind)
>+{
>+ struct nlattr *tb[TCA_CONNMARK_MAX + 1];
>+ struct tcf_connmark_info *ci;
>+ struct tc_connmark *parm;
>+ int ret = 0;
>+
>+ if (nla == NULL)
>+ return -EINVAL;
>+
>+ ret = nla_parse_nested(tb, TCA_CONNMARK_MAX, nla, connmark_policy);
>+ if (ret < 0)
>+ return ret;
>+
>+ parm = nla_data(tb[TCA_CONNMARK_PARMS]);
>+
>+ if (!tcf_hash_check(parm->index, a, bind)) {
>+ ret = tcf_hash_create(parm->index, est, a, sizeof(*ci), bind);
>+ if (ret)
>+ return ret;
>+
>+ ci = to_connmark(a);
>+ ci->tcf_action = parm->action;
>+ ci->zone = parm->zone;
>+
>+ tcf_hash_insert(a);
>+ ret = ACT_P_CREATED;
>+ } else {
>+ ci = to_connmark(a);
>+ if (bind)
>+ return 0;
>+ tcf_hash_release(a, bind);
>+ if (!ovr)
>+ return -EEXIST;
>+ /* replacing action and zone */
>+ ci->tcf_action = parm->action;
>+ ci->zone = parm->zone;
>+ }
>+
>+ return ret;
>+}
>+
>+static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
>+ int bind, int ref)
>+{
>+ unsigned char *b = skb_tail_pointer(skb);
>+ struct tcf_connmark_info *ci = a->priv;
>+
>+ struct tc_connmark opt = {
>+ .index = ci->tcf_index,
>+ .refcnt = ci->tcf_refcnt - ref,
>+ .bindcnt = ci->tcf_bindcnt - bind,
>+ .action = ci->tcf_action,
>+ .zone = ci->zone,
>+ };
>+ struct tcf_t t;
>+
>+ if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt))
>+ goto nla_put_failure;
>+
>+ t.install = jiffies_to_clock_t(jiffies - ci->tcf_tm.install);
>+ t.lastuse = jiffies_to_clock_t(jiffies - ci->tcf_tm.lastuse);
>+ t.expires = jiffies_to_clock_t(ci->tcf_tm.expires);
>+ if (nla_put(skb, TCA_CONNMARK_TM, sizeof(t), &t))
>+ goto nla_put_failure;
>+
>+ return skb->len;
>+nla_put_failure:
>+ nlmsg_trim(skb, b);
>+ return -1;
>+}
>+
>+static struct tc_action_ops act_connmark_ops = {
>+ .kind = "connmark",
>+ .hinfo = &connmark_hash_info,
>+ .type = TCA_ACT_CONNMARK,
>+ .owner = THIS_MODULE,
>+ .act = tcf_connmark,
>+ .dump = tcf_connmark_dump,
>+ .init = tcf_connmark_init,
>+};
>+
>+MODULE_AUTHOR("Felix Fietkau <nbd@openwrt.org>");
>+MODULE_DESCRIPTION("Connection tracking mark restoring");
>+MODULE_LICENSE("GPL");
>+
>+static int __init connmark_init_module(void)
>+{
>+ int ret;
>+
>+ ret = tcf_hashinfo_init(&connmark_hash_info, CONNMARK_TAB_MASK);
>+ if (ret)
>+ return ret;
>+
>+ return tcf_register_action(&act_connmark_ops, CONNMARK_TAB_MASK);
>+}
>+
>+static void __exit connmark_cleanup_module(void)
>+{
>+ tcf_unregister_action(&act_connmark_ops);
>+}
>+
>+module_init(connmark_init_module);
>+module_exit(connmark_cleanup_module);
>--
>1.7.9.5
>
>--
>To unsubscribe from this list: send the line "unsubscribe netdev" in
>the body of a message to majordomo@vger.kernel.org
>More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Mr. Morgan Gordon
From: Mr. Morgan Gordon @ 2015-01-12 10:39 UTC (permalink / raw)
I have a business transaction of USD$27Million for you. Contact me (morgangordon01@gmail.com) +27-717-397-609 Mr. Morgan Gordon
^ permalink raw reply
* Re: [patch net-next] tc: add BPF based action
From: Jiri Pirko @ 2015-01-12 10:52 UTC (permalink / raw)
To: Alexei Starovoitov
Cc: Daniel Borkmann, Network Development, David S. Miller, jhs,
Stephen Hemminger
In-Reply-To: <CAMEtUuzjdd55JrK2oJFRHFVAmogwM-tK8dKxidoRpG=SrFs4bA@mail.gmail.com>
Thu, Jan 08, 2015 at 08:04:31PM CET, ast@plumgrid.com wrote:
>On Wed, Jan 7, 2015 at 11:26 PM, Jiri Pirko <jiri@resnulli.us> wrote:
>>>
>>>On the other hand, I would understand if it's at some point in
>>>time eBPF which would f.e. mangle the packet, but the API you
>>>propose is clearly classic BPF. ;)
>>
>> Exactly. I would like to extend cls_bpf and act_bpf to handle eBPF right
>> after. That is the point.
>
>I say that connecting it with classic BPF is not a prerequisite
>to use eBPF in there. Invocation place may be the same,
>but the way to pass the program will be different.
>For classic with just pass the whole program, whereas
>for eBPF we'll be likely passing fd.
>Theoretically we can pass eBPF as vanilla program
>as well that doesn't have map access, but verifier check
>will only be binary (accept or reject). Which is not user
>friendly. Even rejection of classic BPF is hard to decipher.
>Especially when only language for classic is assembler
>and poor users have no easy way to know what was
>wrong with the program. Therefore I like bpf syscall
>as a main and only interface to load the programs
>and pass prog_fd to places where they suppose to run.
>Having syscall as center place to load programs
>also helps with accounting, since root will be able
>to do something like 'lsmod' to see all loaded programs.
>Anyway, that's a conversion for later...
>
>As Daniel pointed out I think some better articulation
>on what classic bpf programs will do here is needed.
>It seems they will work as pre-filter on an action?
>Few examples would help to understand use cases...
Well, one can define bpf action to do final policy in tc pipeline if
skb should be dropped or not. I'm aware that this is in theory doable by
cls_bpf, but oftentimes one likes to use different cls.
And also, the plan is to extend this for ebpf in near future, as well
as cls_bpf. That will provide many more possibilities for user.
The intention here is to keep cls_bpf and act_bpf feature-consistent.
Thanks.
Jiri
^ permalink raw reply
* RE: Clarification regarding IFLA_BRPORT_LEARNING_SYNC and aging of fdb entries learnt via br_fdb_external_learn_add()
From: Arad, Ronen @ 2015-01-12 11:00 UTC (permalink / raw)
To: Scott Feldman, Netdev; +Cc: Jiri Pirko, Siva Mannem, marichika4@gmail.com
In-Reply-To: <CAE4R7bCLiD7qQX-YFp6be72_jWJM_T6Yf=-MQL_2udph46QD5w@mail.gmail.com>
>-----Original Message-----
>From: Scott Feldman [mailto:sfeldma@gmail.com]
>Sent: Saturday, January 10, 2015 10:29 PM
>To: Arad, Ronen
>Cc: Netdev; Jiri Pirko; Siva Mannem; marichika4@gmail.com
>Subject: Re: Clarification regarding IFLA_BRPORT_LEARNING_SYNC and aging of
>fdb entries learnt via br_fdb_external_learn_add()
>
>Perfect, I think we have a good working set of use-cases. Thanks for
>adding those. Your case 3 seems do-able without too much work since
>we already know which ones where externally added, just need another
>per-bridge-port flag to turn off bridging aging of externally learned
>entries. This will address the performance issue you (and B
>Viswanath) raised.
> What about the entry stats, from the user's
>'bridge -s fdb show" perspective for the bridge fdb entries? Will
>these numbers match expectations? I think case 1 and case 4 provide a
>coherent stats view. Case 3 seems to be lacking in this regard.
>
I think that statistics accuracy is orthogonal to the mechanism used for aging
in all the cases where learning sync is enabled (i.e. cases 1, 3, 4).
Accuracy only depends on how frequent the switch port driver notifies the
bridge FDB. The best accuracy achievable is with updates once per-second. At
the other extreme the switch port driver does not refresh entries. It only
notifies the bridge after entries are removed from the HW. In this extreme case
the statistics will really show the time since an entry was first learned and
not the time since it was last re-learned in HW.
Switch port driver could pick some
acceptable rate to update the bridge module. Within a typical 5 minutes aging
interval, updates every 10 seconds or every 30 seconds could be a reasonable
tradeoff between statistics accuracy and system overhead.
>On Fri, Jan 9, 2015 at 10:51 PM, Arad, Ronen <ronen.arad@intel.com> wrote:
>> [...]
>>>> It is indeed simpler. However, if the overhead of reading hit bits from
>the
>>>HW
>>>> and updating freshness of entries using br_fdb_external_learn_add() is too
>>>> expensive, it would force such platforms to disable learning sync
>>>altogether.
>>>> Therefore, I believe aging offload flag (could be sufficient at bridge
>>>level)
>>>> and external aging interval (possibly longer than the software aging
>>>interval)
>>>> will encourage drivers to use leaning sync.
>>>>>-scott
>>>
>>>I'm not sure I follow that last part.
>>>
>>>Can we list out the use-cases to see what's missing?
>>>
>>>Case 1: bridge ages out learned_sync'ed entries
>>>
>>>bridge port learning: off
>>>offload port learning: on
>>>offload port learning_sync: on
>>>
>>>Driver calls br_fdb_external_learn_add() periodically to refresh
>>>bridge fdb entry
>>>to keep it from going stale.
>>>Bridge removes aged out fdb entries (and indirectly tells offload
>>>device to do the same).
>>>
>>>Case 2: offload device/bridge age out entries independently
>>>
>>>bridge port learning: on
>>>offload port learning: on
>>>offload port learning_sync: off
>>>
>>>Bridge ages out its stale learned entries, independent of offload device.
>>>Offload device ages out its stale learned entries, independent of bridge.
>>>
>>>Case 3: ?
>>>
>>>Please help me finish the use-case list so we can see what's missing.
>>
>>
>> Case 3: offload device ages out external entries and notifies bridge
>>
>> bridge port learning: on or off (Bridge only learns from packets seen
>(Rx/Tx))
>> offload port learning: on
>> offload port learning_sync: on
>> bridge aging of external learn: off
>> offload device aging: on
>>
>> Switch port/device driver ages entries (could be by HW aging or soft aging
>in
>> driver/firmware),
>> notifies bridge about aged entries using br_fdb_externallearn_del().
>> This allows efficient HW aging and batched notification at a pace
>independent
>> of bridge aging interval.
>> User still enjoys a single VLAN-aware FDB within the bridge module and
>having
>> all entries in one place. Externally learned entries are identified as such
>by
>> iproute2 "bridge fdb show" command. Device does not have to implement
>> ndo_bridge_fdb_dump() for each offload port as the bridge module provides it
>> for the common FDB.
>>
>> Case 4: bridge ages owned and external entries at different intervals
>>
>> bridge port learning: on (Effectively only for Rx/Tx traffic seen by
>> software bridge)
>> offload port learning: on (transient traffic and RxTx, overlap with bridge
>> learned entries possible)
>> offload port learning_sync: on
>> bridge aging of external learn: on
>> offload device aging: off
>> bridge aging interval for owned entries: T1
>> bridge aging interval for external entries: T2 (Typically T2 > T1)
>>
>> This allows for fine-tuning the overhead of periodic updates of entries
>> freshness from offload port device.
>>
>> The bottom line of cases 3-4 is that it is desirable to use the common
>bridge
>> FDB as long as bridge aging of externally learned entries could be
>controlled
>> by the offload device: Either be at a longer interval or disabled.
>>
>>>
>>>-scott
>>>--
>>>To unsubscribe from this list: send the line "unsubscribe netdev" in
>>>the body of a message to majordomo@vger.kernel.org
>>>More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [PATCH] bonding: cleanup bond_opts array
From: Nikolay Aleksandrov @ 2015-01-12 11:05 UTC (permalink / raw)
To: Andy Gospodarek, Jonathan Toppins; +Cc: netdev, shm
In-Reply-To: <20150109184821.GC56806@gospo.home.greyhouse.net>
On 09/01/15 19:48, Andy Gospodarek wrote:
> On Fri, Jan 09, 2015 at 01:31:08PM -0500, Jonathan Toppins wrote:
>> Remove the empty array element initializer and size the array with
>> BOND_OPT_LAST so the compiler will complain if more elements are in
>> there than should be.
>>
>> An interesting unwanted side effect of this initializer is that if one
>> inserts new options into the middle of the array then this initializer
>> will zero out the option that equals BOND_OPT_TLB_DYNAMIC_LB+1.
>>
>> Example:
>> Extend the OPTS enum:
>> enum {
>> ...
>> BOND_OPT_TLB_DYNAMIC_LB,
>> BOND_OPT_LACP_NEW1,
>> BOND_OPT_LAST
>> };
>>
>> Now insert into bond_opts array:
>> static const struct bond_option bond_opts[] = {
>> ...
>> [BOND_OPT_LACP_RATE] = { .... unchanged stuff .... },
>> [BOND_OPT_LACP_NEW1] = { ... new stuff ... },
>> ...
>> [BOND_OPT_TLB_DYNAMIC_LB] = { .... unchanged stuff ....},
>> { } // MARK A
>> };
>>
>> Since BOND_OPT_LACP_NEW1 = BOND_OPT_TLB_DYNAMIC_LB+1, the last
>> initializer (MARK A) will overwrite the contents of BOND_OPT_LACP_NEW1
>> and can be easily viewed with the crash utility.
>>
>> Signed-off-by: Jonathan Toppins <jtoppins@cumulusnetworks.com>
>> Cc: Andy Gospodarek <gospo@cumulusnetworks.com>
>> Cc: Nikolay Aleksandrov <nikolay@redhat.com>
>
> I do not recall if there was a specific reason that Nik added this, so
> presuming there was not....
>
> Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com>
>
Indeed, it's an oversight on my part from a previous version of the opts
patch-set which used a different end-of-array indicator :-)
Acked-by: Nikolay Aleksandrov <nikolay@redhat.com>
^ permalink raw reply
* Re: [PATCH v4 01/04] can: kvaser_usb: Don't dereference skb after a netif_rx()
From: Marc Kleine-Budde @ 2015-01-12 11:05 UTC (permalink / raw)
To: Ahmed S. Darwish, Olivier Sobrie, Oliver Hartkopp,
Wolfgang Grandegger
Cc: David S. Miller, Paul Gortmaker, Linux-CAN, netdev, LKML
In-Reply-To: <20150111204952.GC8999@linux>
[-- Attachment #1: Type: text/plain, Size: 634 bytes --]
On 01/11/2015 09:49 PM, Ahmed S. Darwish wrote:
> From: Ahmed S. Darwish <ahmed.darwish@valeo.com>
>
> We should not touch the packet after a netif_rx: it might
> get freed behind our back.
>
> Suggested-by: Marc Kleine-Budde <mkl@pengutronix.de>
> Signed-off-by: Ahmed S. Darwish <ahmed.darwish@valeo.com>
Applied to can/master.
tnx,
Marc
--
Pengutronix e.K. | Marc Kleine-Budde |
Industrial Linux Solutions | Phone: +49-231-2826-924 |
Vertretung West/Dortmund | Fax: +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686 | http://www.pengutronix.de |
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]
^ permalink raw reply
* Re: [PATCH net] ipv6: Prevent ipv6_find_hdr() from returning ENOENT for valid non-first fragments
From: Rahul Sharma @ 2015-01-12 11:08 UTC (permalink / raw)
To: Hannes Frederic Sowa
Cc: Pablo Neira Ayuso, netdev, linux-kernel, netfilter-devel
In-Reply-To: <1420818636.31075.15.camel@stressinduktion.org>
Hi Pablo, Hannes
On Fri, Jan 9, 2015 at 9:20 PM, Hannes Frederic Sowa
<hannes@stressinduktion.org> wrote:
> On Fr, 2015-01-09 at 12:45 +0100, Pablo Neira Ayuso wrote:
>> Hi Hannes,
>>
>> On Fri, Jan 09, 2015 at 12:34:15PM +0100, Hannes Frederic Sowa wrote:
>> > On Fri, Jan 9, 2015, at 08:18, Rahul Sharma wrote:
>> > > Hi Pablo,
>> > >
>> > > On Fri, Jan 9, 2015 at 5:35 AM, Pablo Neira Ayuso <pablo@netfilter.org>
>> > > wrote:
>> > > > On Thu, Jan 08, 2015 at 11:39:16PM +0100, Hannes Frederic Sowa wrote:
>> > > >> Hi Pablo,
>> > > >>
>> > > >> On Thu, Jan 8, 2015, at 21:53, Pablo Neira Ayuso wrote:
>> > > >> > I'm afraid we cannot just get rid of that !ipv6_ext_hdr() check. The
>> > > >> > ipv6_find_hdr() function is designed to return the transport protocol.
>> > > >> > After the proposed change, it will return extension header numbers.
>> > > >> > This will break existing ip6tables rulesets since the `-p' option
>> > > >> > relies on this function to match the transport protocol.
>> > > >> >
>> > > >> > Note that the AH header is skipped (see code a bit below this
>> > > >> > problematic fragmentation handling) so the follow up header after the
>> > > >> > AH header is returned as the transport header.
>> > > >> >
>> > > >> > We can probably return the AH protocol number for non-1st fragments.
>> > > >> > However, that would be something new to ip6tables since nobody has
>> > > >> > ever seen packet matching `-p ah' rules. Thus, we restore control to
>> > > >> > the user to allow this, but we would accept all kind of fragmented AH
>> > > >> > traffic through the firewall since we cannot know what transport
>> > > >> > protocol contains from non-1st fragments (unless I'm missing anything,
>> > > >> > I need to have a closer look at this again tomorrow with fresher
>> > > >> > mind).
>> > > >>
>> > > >> The code in question is guarded by (_frag_off != 0), so we are
>> > > >> definitely processing a non-1st fragment currently. The -p match would
>> > > >> happen at the time when the packet is reassembled and thus ipv6_find_hdr
>> > > >> will find the real transport (final) header at this point (I hope I
>> > > >> followed the code correctly here).
>> > > >
>> > > > Then, Rahul should get things working by modprobing nf_defrag_ipv6.
>> > >
>> > > I already had nf_defrag_ipv6 installed when the issue occured. But I
>> > > see ip6table_raw_hook returning NF_DROP for the second fragment.
>> >
>> > That's what I expected. I think the change only affects hooks before
>> > reassembly.
>>
>> reassembly happens at NF_IP6_PRI_CONNTRACK_DEFRAG (-400), so that
>> happens before NF_IP6_PRI_RAW (-300) in IPv6 which is where the raw
>> table is placed.
>
> I tried to reproduce it, but couldn't get non-1st fragments getting
> dropped during traversal of the raw table. They get dropped earlier at
> during reassembly or pass.
>
> I agree with Pablo, I also would like to see more data.
>
> Thanks,
> Hannes
>
>
I enabled pr_debug() and there was no error in nf_ct_frag6_gather().
It seems to have defragmented the packet correctly. As expected,
ipv6_defrag() returns NF_STOLEN for the first packet after queuing it.
For the next fragment, ipv6_defrag() calls nf_ct_frag6_output() after
after reassembling it. I checked skb->len inside nf_ct_frag6_output()
and it implies that they were indeed reassembled. Now from what I
follow from the code, this function seems to iterate over both the
fragments with corresponding skb's and incrementing
thresh(NF_IP6_PRI_CONNTRACK_DEFRAG) by 1 so the subsequent hooks
(ip6table_raw_hook, ipv6_conntrack_in, ip6table_mangle_hook) should
get called in order for both the fragments. This works fine for the
first fragment as I had mentioned, but the ip6table_raw_hook returns
NF_DROP for the second fragment. Please let me know if you need more
inputs.
Thanks,
Rahul
^ permalink raw reply
* Re: [PATCH] net/at91_ether: prepare and unprepare clock
From: Alexandre Belloni @ 2015-01-12 11:08 UTC (permalink / raw)
To: David S. Miller
Cc: Nicolas Ferre, Cyrille Pitchen, Boris Brezillon, linux-arm-kernel,
linux-kernel, netdev
In-Reply-To: <54AE623E.5020507@atmel.com>
Hi Dave,
This is actually a fix for 3.19, can you take it for the next rc ?
Thanks!
On 08/01/2015 at 11:55:58 +0100, Nicolas Ferre wrote :
> Le 07/01/2015 23:59, Alexandre Belloni a écrit :
> > The clock is enabled without being prepared, this leads to:
> >
> > WARNING: CPU: 0 PID: 1 at drivers/clk/clk.c:889 __clk_enable+0x24/0xa8()
> >
> > and a non working ethernet interface.
> >
> > Use clk_prepare_enable() and clk_disable_unprepare() to handle the clock.
> >
> > Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
>
> Acked-by: Nicolas Ferre <nicolas.ferre@atmel.com>
>
> > ---
> > drivers/net/ethernet/cadence/at91_ether.c | 10 +++++-----
> > 1 file changed, 5 insertions(+), 5 deletions(-)
> >
> > diff --git a/drivers/net/ethernet/cadence/at91_ether.c b/drivers/net/ethernet/cadence/at91_ether.c
> > index 55eb7f2af2b4..7ef55f5fa664 100644
> > --- a/drivers/net/ethernet/cadence/at91_ether.c
> > +++ b/drivers/net/ethernet/cadence/at91_ether.c
> > @@ -340,7 +340,7 @@ static int __init at91ether_probe(struct platform_device *pdev)
> > res = PTR_ERR(lp->pclk);
> > goto err_free_dev;
> > }
> > - clk_enable(lp->pclk);
> > + clk_prepare_enable(lp->pclk);
> >
> > lp->hclk = ERR_PTR(-ENOENT);
> > lp->tx_clk = ERR_PTR(-ENOENT);
> > @@ -406,7 +406,7 @@ static int __init at91ether_probe(struct platform_device *pdev)
> > err_out_unregister_netdev:
> > unregister_netdev(dev);
> > err_disable_clock:
> > - clk_disable(lp->pclk);
> > + clk_disable_unprepare(lp->pclk);
> > err_free_dev:
> > free_netdev(dev);
> > return res;
> > @@ -424,7 +424,7 @@ static int at91ether_remove(struct platform_device *pdev)
> > kfree(lp->mii_bus->irq);
> > mdiobus_free(lp->mii_bus);
> > unregister_netdev(dev);
> > - clk_disable(lp->pclk);
> > + clk_disable_unprepare(lp->pclk);
> > free_netdev(dev);
> >
> > return 0;
> > @@ -440,7 +440,7 @@ static int at91ether_suspend(struct platform_device *pdev, pm_message_t mesg)
> > netif_stop_queue(net_dev);
> > netif_device_detach(net_dev);
> >
> > - clk_disable(lp->pclk);
> > + clk_disable_unprepare(lp->pclk);
> > }
> > return 0;
> > }
> > @@ -451,7 +451,7 @@ static int at91ether_resume(struct platform_device *pdev)
> > struct macb *lp = netdev_priv(net_dev);
> >
> > if (netif_running(net_dev)) {
> > - clk_enable(lp->pclk);
> > + clk_prepare_enable(lp->pclk);
> >
> > netif_device_attach(net_dev);
> > netif_start_queue(net_dev);
> >
>
>
> --
> Nicolas Ferre
--
Alexandre Belloni, Free Electrons
Embedded Linux, Kernel and Android engineering
http://free-electrons.com
^ permalink raw reply
* Re: [PATCH v4 2/4] can: kvaser_usb: Update error counters before exiting on OOM
From: Marc Kleine-Budde @ 2015-01-12 11:09 UTC (permalink / raw)
To: Ahmed S. Darwish, Olivier Sobrie, Oliver Hartkopp,
Wolfgang Grandegger
Cc: David S. Miller, Paul Gortmaker, Linux-CAN, netdev, LKML
In-Reply-To: <20150111201519.GC8855@linux>
[-- Attachment #1: Type: text/plain, Size: 572 bytes --]
On 01/11/2015 09:15 PM, Ahmed S. Darwish wrote:
> From: Ahmed S. Darwish <ahmed.darwish@valeo.com>
>
> Let the error counters be more accurate in case of Out of
> Memory conditions.
Please have a look at kvaser_usb_rx_error(), the whole state handling is
omitted in case of OOM.
Marc
--
Pengutronix e.K. | Marc Kleine-Budde |
Industrial Linux Solutions | Phone: +49-231-2826-924 |
Vertretung West/Dortmund | Fax: +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686 | http://www.pengutronix.de |
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]
^ permalink raw reply
* Re: [Xen-devel] [PATCH net] drivers: net: xen-netfront: remove residual dead code
From: David Vrabel @ 2015-01-12 11:12 UTC (permalink / raw)
To: Vincenzo Maffione, netdev; +Cc: xen-devel, boris.ostrovsky, david.vrabel
In-Reply-To: <1420881625-4935-1-git-send-email-v.maffione@gmail.com>
On 10/01/15 09:20, Vincenzo Maffione wrote:
> This patch removes some unused arrays from the netfront private
> data structures. These arrays were used in "flip" receive mode.
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
Thanks.
David
^ permalink raw reply
* Re: [PATCH v4 3/4] can: kvaser_usb: Add support for the Usbcan-II family
From: Ahmed S. Darwish @ 2015-01-12 11:20 UTC (permalink / raw)
To: Olivier Sobrie, Oliver Hartkopp, Wolfgang Grandegger,
Marc Kleine-Budde
Cc: David S. Miller, Paul Gortmaker, Linux-CAN, netdev, LKML
In-Reply-To: <20150111203612.GA8999@linux>
Hi Marc,
On Sun, Jan 11, 2015 at 03:36:12PM -0500, Ahmed S. Darwish wrote:
> From: Ahmed S. Darwish <ahmed.darwish@valeo.com>
>
> CAN to USB interfaces sold by the Swedish manufacturer Kvaser are
> divided into two major families: 'Leaf', and 'UsbcanII'. From an
> Operating System perspective, the firmware of both families behave
> in a not too drastically different fashion.
>
> This patch adds support for the UsbcanII family of devices to the
> current Kvaser Leaf-only driver.
>
> CAN frames sending, receiving, and error handling paths has been
> tested using the dual-channel "Kvaser USBcan II HS/LS" dongle. It
> should also work nicely with other products in the same category.
>
Please delay applying this to as I've just discovered that
removal of two of the device family checks introduced two
un-necessary GCC warnings.
Will send and updated version soon.
Thanks,
Darwish
^ permalink raw reply
* Re: [PATCH v4 3/4] can: kvaser_usb: Add support for the Usbcan-II family
From: Marc Kleine-Budde @ 2015-01-12 11:43 UTC (permalink / raw)
To: Ahmed S. Darwish, Olivier Sobrie, Oliver Hartkopp,
Wolfgang Grandegger
Cc: David S. Miller, Paul Gortmaker, Linux-CAN, netdev, LKML
In-Reply-To: <20150111203612.GA8999@linux>
[-- Attachment #1: Type: text/plain, Size: 35082 bytes --]
On 01/11/2015 09:36 PM, Ahmed S. Darwish wrote:
> From: Ahmed S. Darwish <ahmed.darwish@valeo.com>
>
> CAN to USB interfaces sold by the Swedish manufacturer Kvaser are
> divided into two major families: 'Leaf', and 'UsbcanII'. From an
> Operating System perspective, the firmware of both families behave
> in a not too drastically different fashion.
>
> This patch adds support for the UsbcanII family of devices to the
> current Kvaser Leaf-only driver.
>
> CAN frames sending, receiving, and error handling paths has been
> tested using the dual-channel "Kvaser USBcan II HS/LS" dongle. It
> should also work nicely with other products in the same category.
>
> List of new devices supported by this driver update:
>
> - Kvaser USBcan II HS/HS
> - Kvaser USBcan II HS/LS
> - Kvaser USBcan Rugged ("USBcan Rev B")
> - Kvaser Memorator HS/HS
> - Kvaser Memorator HS/LS
> - Scania VCI2 (if you have the Kvaser logo on top)
>
> Signed-off-by: Ahmed S. Darwish <ahmed.darwish@valeo.com>
See some minor comments inline.
Marc
> ---
> drivers/net/can/usb/Kconfig | 8 +-
> drivers/net/can/usb/kvaser_usb.c | 612 ++++++++++++++++++++++++++++++---------
> 2 files changed, 487 insertions(+), 133 deletions(-)
>
> ** V4 Changelog:
> - Use type-safe C methods instead of cpp macros
> - Further clarify the code and comments on error events channel arbitration
> - Remove defensive checks against non-existing families
> - Re-order methods to remove forward declarations
> - Smaller stuff spotted by earlier review (function prefexes, etc.)
>
> ** V3 Changelog:
> - Fix padding for the usbcan_msg_tx_acknowledge command
> - Remove kvaser_usb->max_channels and the MAX_NET_DEVICES macro
> - Rename commands to CMD_LEAF_xxx and CMD_USBCAN_xxx
> - Apply checkpatch.pl suggestions ('net/' comments, multi-line strings, etc.)
>
> ** V2 Changelog:
> - Update Kconfig entries
> - Use actual number of CAN channels (instead of max) where appropriate
> - Rebase over a new set of UsbcanII-independent driver fixes
>
> diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig
> index a77db919..f6f5500 100644
> --- a/drivers/net/can/usb/Kconfig
> +++ b/drivers/net/can/usb/Kconfig
> @@ -25,7 +25,7 @@ config CAN_KVASER_USB
> tristate "Kvaser CAN/USB interface"
> ---help---
> This driver adds support for Kvaser CAN/USB devices like Kvaser
> - Leaf Light.
> + Leaf Light and Kvaser USBcan II.
>
> The driver provides support for the following devices:
> - Kvaser Leaf Light
> @@ -46,6 +46,12 @@ config CAN_KVASER_USB
> - Kvaser USBcan R
> - Kvaser Leaf Light v2
> - Kvaser Mini PCI Express HS
> + - Kvaser USBcan II HS/HS
> + - Kvaser USBcan II HS/LS
> + - Kvaser USBcan Rugged ("USBcan Rev B")
> + - Kvaser Memorator HS/HS
> + - Kvaser Memorator HS/LS
> + - Scania VCI2 (if you have the Kvaser logo on top)
>
> If unsure, say N.
>
> diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
> index 0eb870b..da47d17 100644
> --- a/drivers/net/can/usb/kvaser_usb.c
> +++ b/drivers/net/can/usb/kvaser_usb.c
> @@ -6,10 +6,12 @@
> * Parts of this driver are based on the following:
> * - Kvaser linux leaf driver (version 4.78)
> * - CAN driver for esd CAN-USB/2
> + * - Kvaser linux usbcanII driver (version 5.3)
> *
> * Copyright (C) 2002-2006 KVASER AB, Sweden. All rights reserved.
> * Copyright (C) 2010 Matthias Fuchs <matthias.fuchs@esd.eu>, esd gmbh
> * Copyright (C) 2012 Olivier Sobrie <olivier@sobrie.be>
> + * Copyright (C) 2015 Valeo Corporation
> */
>
> #include <linux/completion.h>
> @@ -21,6 +23,15 @@
> #include <linux/can/dev.h>
> #include <linux/can/error.h>
>
> +/* Kvaser USB CAN dongles are divided into two major families:
> + * - Leaf: Based on Renesas M32C, running firmware labeled as 'filo'
> + * - UsbcanII: Based on Renesas M16C, running firmware labeled as 'helios'
> + */
> +enum kvaser_usb_family {
> + KVASER_LEAF,
> + KVASER_USBCAN,
> +};
Nitpick: please move after the #defines
> +
> #define MAX_TX_URBS 16
> #define MAX_RX_URBS 4
> #define START_TIMEOUT 1000 /* msecs */
> @@ -30,8 +41,9 @@
> #define RX_BUFFER_SIZE 3072
> #define CAN_USB_CLOCK 8000000
> #define MAX_NET_DEVICES 3
> +#define MAX_USBCAN_NET_DEVICES 2
>
> -/* Kvaser USB devices */
> +/* Leaf USB devices */
> #define KVASER_VENDOR_ID 0x0bfd
> #define USB_LEAF_DEVEL_PRODUCT_ID 10
> #define USB_LEAF_LITE_PRODUCT_ID 11
> @@ -56,6 +68,24 @@
> #define USB_LEAF_LITE_V2_PRODUCT_ID 288
> #define USB_MINI_PCIE_HS_PRODUCT_ID 289
>
> +static inline bool kvaser_is_leaf(const struct usb_device_id *id)
> +{
> + return id->idProduct >= USB_LEAF_DEVEL_PRODUCT_ID &&
> + id->idProduct <= USB_MINI_PCIE_HS_PRODUCT_ID;
> +}
> +
> +/* USBCANII devices */
> +#define USB_USBCAN_REVB_PRODUCT_ID 2
> +#define USB_VCI2_PRODUCT_ID 3
> +#define USB_USBCAN2_PRODUCT_ID 4
> +#define USB_MEMORATOR_PRODUCT_ID 5
> +
> +static inline bool kvaser_is_usbcan(const struct usb_device_id *id)
> +{
> + return id->idProduct >= USB_USBCAN_REVB_PRODUCT_ID &&
> + id->idProduct <= USB_MEMORATOR_PRODUCT_ID;
> +}
> +
> /* USB devices features */
> #define KVASER_HAS_SILENT_MODE BIT(0)
> #define KVASER_HAS_TXRX_ERRORS BIT(1)
> @@ -73,7 +103,7 @@
> #define MSG_FLAG_TX_ACK BIT(6)
> #define MSG_FLAG_TX_REQUEST BIT(7)
>
> -/* Can states */
> +/* Can states (M16C CxSTRH register) */
> #define M16C_STATE_BUS_RESET BIT(0)
> #define M16C_STATE_BUS_ERROR BIT(4)
> #define M16C_STATE_BUS_PASSIVE BIT(5)
> @@ -98,7 +128,13 @@
> #define CMD_START_CHIP_REPLY 27
> #define CMD_STOP_CHIP 28
> #define CMD_STOP_CHIP_REPLY 29
> -#define CMD_GET_CARD_INFO2 32
> +#define CMD_READ_CLOCK 30
> +#define CMD_READ_CLOCK_REPLY 31
> +
> +#define CMD_LEAF_GET_CARD_INFO2 32
> +#define CMD_USBCAN_RESET_CLOCK 32
> +#define CMD_USBCAN_CLOCK_OVERFLOW_EVENT 33
> +
> #define CMD_GET_CARD_INFO 34
> #define CMD_GET_CARD_INFO_REPLY 35
> #define CMD_GET_SOFTWARE_INFO 38
> @@ -108,8 +144,9 @@
> #define CMD_RESET_ERROR_COUNTER 49
> #define CMD_TX_ACKNOWLEDGE 50
> #define CMD_CAN_ERROR_EVENT 51
> -#define CMD_USB_THROTTLE 77
> -#define CMD_LOG_MESSAGE 106
> +
> +#define CMD_LEAF_USB_THROTTLE 77
> +#define CMD_LEAF_LOG_MESSAGE 106
>
> /* error factors */
> #define M16C_EF_ACKE BIT(0)
> @@ -121,6 +158,14 @@
> #define M16C_EF_RCVE BIT(6)
> #define M16C_EF_TRE BIT(7)
>
> +/* Only Leaf-based devices can report M16C error factors,
> + * thus define our own error status flags for USBCANII
> + */
> +#define USBCAN_ERROR_STATE_NONE 0
> +#define USBCAN_ERROR_STATE_TX_ERROR BIT(0)
> +#define USBCAN_ERROR_STATE_RX_ERROR BIT(1)
> +#define USBCAN_ERROR_STATE_BUSERROR BIT(2)
> +
> /* bittiming parameters */
> #define KVASER_USB_TSEG1_MIN 1
> #define KVASER_USB_TSEG1_MAX 16
> @@ -137,7 +182,7 @@
> #define KVASER_CTRL_MODE_SELFRECEPTION 3
> #define KVASER_CTRL_MODE_OFF 4
>
> -/* log message */
> +/* Extended CAN identifier flag */
> #define KVASER_EXTENDED_FRAME BIT(31)
>
> struct kvaser_msg_simple {
> @@ -148,30 +193,55 @@ struct kvaser_msg_simple {
> struct kvaser_msg_cardinfo {
> u8 tid;
> u8 nchannels;
> - __le32 serial_number;
> - __le32 padding;
> + union {
> + struct {
> + __le32 serial_number;
> + __le32 padding;
> + } __packed leaf0;
> + struct {
> + __le32 serial_number_low;
> + __le32 serial_number_high;
> + } __packed usbcan0;
> + } __packed;
> __le32 clock_resolution;
> __le32 mfgdate;
> u8 ean[8];
> u8 hw_revision;
> - u8 usb_hs_mode;
> - __le16 padding2;
> + union {
> + struct {
> + u8 usb_hs_mode;
> + } __packed leaf1;
> + struct {
> + u8 padding;
> + } __packed usbcan1;
> + } __packed;
> + __le16 padding;
> } __packed;
>
> struct kvaser_msg_cardinfo2 {
> u8 tid;
> - u8 channel;
> + u8 reserved;
> u8 pcb_id[24];
> __le32 oem_unlock_code;
> } __packed;
>
> -struct kvaser_msg_softinfo {
> +struct leaf_msg_softinfo {
> u8 tid;
> - u8 channel;
> + u8 padding0;
> __le32 sw_options;
> __le32 fw_version;
> __le16 max_outstanding_tx;
> - __le16 padding[9];
> + __le16 padding1[9];
> +} __packed;
> +
> +struct usbcan_msg_softinfo {
> + u8 tid;
> + u8 fw_name[5];
> + __le16 max_outstanding_tx;
> + u8 padding[6];
> + __le32 fw_version;
> + __le16 checksum;
> + __le16 sw_options;
> } __packed;
>
> struct kvaser_msg_busparams {
> @@ -188,36 +258,86 @@ struct kvaser_msg_tx_can {
> u8 channel;
> u8 tid;
> u8 msg[14];
> - u8 padding;
> - u8 flags;
> + union {
> + struct {
> + u8 padding;
> + u8 flags;
> + } __packed leaf;
> + struct {
> + u8 flags;
> + u8 padding;
> + } __packed usbcan;
> + } __packed;
> +} __packed;
> +
> +struct kvaser_msg_rx_can_header {
> + u8 channel;
> + u8 flag;
> } __packed;
>
> -struct kvaser_msg_rx_can {
> +struct leaf_msg_rx_can {
> u8 channel;
> u8 flag;
> +
> __le16 time[3];
> u8 msg[14];
> } __packed;
>
> -struct kvaser_msg_chip_state_event {
> +struct usbcan_msg_rx_can {
> + u8 channel;
> + u8 flag;
> +
> + u8 msg[14];
> + __le16 time;
> +} __packed;
> +
> +struct leaf_msg_chip_state_event {
> u8 tid;
> u8 channel;
> +
> __le16 time[3];
> u8 tx_errors_count;
> u8 rx_errors_count;
> +
> u8 status;
> u8 padding[3];
> } __packed;
>
> -struct kvaser_msg_tx_acknowledge {
> +struct usbcan_msg_chip_state_event {
> + u8 tid;
> + u8 channel;
> +
> + u8 tx_errors_count;
> + u8 rx_errors_count;
> + __le16 time;
> +
> + u8 status;
> + u8 padding[3];
> +} __packed;
> +
> +struct kvaser_msg_tx_acknowledge_header {
> + u8 channel;
> + u8 tid;
> +};
> +
> +struct leaf_msg_tx_acknowledge {
> u8 channel;
> u8 tid;
> +
> __le16 time[3];
> u8 flags;
> u8 time_offset;
> } __packed;
>
> -struct kvaser_msg_error_event {
> +struct usbcan_msg_tx_acknowledge {
> + u8 channel;
> + u8 tid;
> +
> + __le16 time;
> + __le16 padding;
> +} __packed;
> +
> +struct leaf_msg_error_event {
> u8 tid;
> u8 flags;
> __le16 time[3];
> @@ -229,6 +349,18 @@ struct kvaser_msg_error_event {
> u8 error_factor;
> } __packed;
>
> +struct usbcan_msg_error_event {
> + u8 tid;
> + u8 padding;
> + u8 tx_errors_count_ch0;
> + u8 rx_errors_count_ch0;
> + u8 tx_errors_count_ch1;
> + u8 rx_errors_count_ch1;
> + u8 status_ch0;
> + u8 status_ch1;
> + __le16 time;
> +} __packed;
> +
> struct kvaser_msg_ctrl_mode {
> u8 tid;
> u8 channel;
> @@ -243,7 +375,7 @@ struct kvaser_msg_flush_queue {
> u8 padding[3];
> } __packed;
>
> -struct kvaser_msg_log_message {
> +struct leaf_msg_log_message {
> u8 channel;
> u8 flags;
> __le16 time[3];
> @@ -260,19 +392,57 @@ struct kvaser_msg {
> struct kvaser_msg_simple simple;
> struct kvaser_msg_cardinfo cardinfo;
> struct kvaser_msg_cardinfo2 cardinfo2;
> - struct kvaser_msg_softinfo softinfo;
> struct kvaser_msg_busparams busparams;
> +
> + struct kvaser_msg_rx_can_header rx_can_header;
> + struct kvaser_msg_tx_acknowledge_header tx_acknowledge_header;
> +
> + union {
> + struct leaf_msg_softinfo softinfo;
> + struct leaf_msg_rx_can rx_can;
> + struct leaf_msg_chip_state_event chip_state_event;
> + struct leaf_msg_tx_acknowledge tx_acknowledge;
> + struct leaf_msg_error_event error_event;
> + struct leaf_msg_log_message log_message;
> + } __packed leaf;
> +
> + union {
> + struct usbcan_msg_softinfo softinfo;
> + struct usbcan_msg_rx_can rx_can;
> + struct usbcan_msg_chip_state_event chip_state_event;
> + struct usbcan_msg_tx_acknowledge tx_acknowledge;
> + struct usbcan_msg_error_event error_event;
> + } __packed usbcan;
> +
> struct kvaser_msg_tx_can tx_can;
> - struct kvaser_msg_rx_can rx_can;
> - struct kvaser_msg_chip_state_event chip_state_event;
> - struct kvaser_msg_tx_acknowledge tx_acknowledge;
> - struct kvaser_msg_error_event error_event;
> struct kvaser_msg_ctrl_mode ctrl_mode;
> struct kvaser_msg_flush_queue flush_queue;
> - struct kvaser_msg_log_message log_message;
> } u;
> } __packed;
>
> +/* Summary of a kvaser error event, for a unified Leaf/Usbcan error
> + * handling. Some discrepancies between the two families exist:
> + *
> + * - USBCAN firmware does not report M16C "error factors"
> + * - USBCAN controllers has difficulties reporting if the raised error
> + * event is for ch0 or ch1. They leave such arbitration to the OS
> + * driver by letting it compare error counters with previous values
> + * and decide the error event's channel. Thus for USBCAN, the channel
> + * field is only advisory.
> + */
> +struct kvaser_error_summary {
> + u8 channel, status, txerr, rxerr;
> + union {
> + struct {
> + u8 error_factor;
> + } leaf;
> + struct {
> + u8 other_ch_status;
> + u8 error_state;
> + } usbcan;
> + };
> +};
> +
> struct kvaser_usb_tx_urb_context {
> struct kvaser_usb_net_priv *priv;
> u32 echo_index;
> @@ -288,6 +458,7 @@ struct kvaser_usb {
>
> u32 fw_version;
> unsigned int nchannels;
> + enum kvaser_usb_family family;
>
> bool rxinitdone;
> void *rxbuf[MAX_RX_URBS];
> @@ -311,6 +482,7 @@ struct kvaser_usb_net_priv {
> };
>
> static const struct usb_device_id kvaser_usb_table[] = {
> + /* Leaf family IDs */
> { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_DEVEL_PRODUCT_ID) },
> { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_PRODUCT_ID) },
> { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_PRODUCT_ID),
> @@ -360,6 +532,17 @@ static const struct usb_device_id kvaser_usb_table[] = {
> .driver_info = KVASER_HAS_TXRX_ERRORS },
> { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_V2_PRODUCT_ID) },
> { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_HS_PRODUCT_ID) },
> +
> + /* USBCANII family IDs */
> + { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN2_PRODUCT_ID),
> + .driver_info = KVASER_HAS_TXRX_ERRORS },
> + { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_REVB_PRODUCT_ID),
> + .driver_info = KVASER_HAS_TXRX_ERRORS },
> + { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMORATOR_PRODUCT_ID),
> + .driver_info = KVASER_HAS_TXRX_ERRORS },
> + { USB_DEVICE(KVASER_VENDOR_ID, USB_VCI2_PRODUCT_ID),
> + .driver_info = KVASER_HAS_TXRX_ERRORS },
> +
> { }
> };
> MODULE_DEVICE_TABLE(usb, kvaser_usb_table);
> @@ -463,7 +646,14 @@ static int kvaser_usb_get_software_info(struct kvaser_usb *dev)
> if (err)
> return err;
>
> - dev->fw_version = le32_to_cpu(msg.u.softinfo.fw_version);
> + switch (dev->family) {
> + case KVASER_LEAF:
> + dev->fw_version = le32_to_cpu(msg.u.leaf.softinfo.fw_version);
> + break;
> + case KVASER_USBCAN:
> + dev->fw_version = le32_to_cpu(msg.u.usbcan.softinfo.fw_version);
> + break;
> + }
>
> return 0;
> }
> @@ -482,7 +672,9 @@ static int kvaser_usb_get_card_info(struct kvaser_usb *dev)
> return err;
>
> dev->nchannels = msg.u.cardinfo.nchannels;
> - if (dev->nchannels > MAX_NET_DEVICES)
> + if ((dev->nchannels > MAX_NET_DEVICES) ||
> + (dev->family == KVASER_USBCAN &&
> + dev->nchannels > MAX_USBCAN_NET_DEVICES))
> return -EINVAL;
>
> return 0;
> @@ -496,8 +688,10 @@ static void kvaser_usb_tx_acknowledge(const struct kvaser_usb *dev,
> struct kvaser_usb_net_priv *priv;
> struct sk_buff *skb;
> struct can_frame *cf;
> - u8 channel = msg->u.tx_acknowledge.channel;
> - u8 tid = msg->u.tx_acknowledge.tid;
> + u8 channel, tid;
> +
> + channel = msg->u.tx_acknowledge_header.channel;
> + tid = msg->u.tx_acknowledge_header.tid;
>
> if (channel >= dev->nchannels) {
> dev_err(dev->udev->dev.parent,
> @@ -616,53 +810,24 @@ static void kvaser_usb_unlink_tx_urbs(struct kvaser_usb_net_priv *priv)
> }
>
> static void kvaser_usb_rx_error(const struct kvaser_usb *dev,
> - const struct kvaser_msg *msg)
> + struct kvaser_error_summary *es)
Can you make "struct kvaser_error_summary *es" const?
> {
> struct can_frame *cf;
> struct sk_buff *skb;
> struct net_device_stats *stats;
> struct kvaser_usb_net_priv *priv;
> unsigned int new_state;
> - u8 channel, status, txerr, rxerr, error_factor;
> -
> - switch (msg->id) {
> - case CMD_CAN_ERROR_EVENT:
> - channel = msg->u.error_event.channel;
> - status = msg->u.error_event.status;
> - txerr = msg->u.error_event.tx_errors_count;
> - rxerr = msg->u.error_event.rx_errors_count;
> - error_factor = msg->u.error_event.error_factor;
> - break;
> - case CMD_LOG_MESSAGE:
> - channel = msg->u.log_message.channel;
> - status = msg->u.log_message.data[0];
> - txerr = msg->u.log_message.data[2];
> - rxerr = msg->u.log_message.data[3];
> - error_factor = msg->u.log_message.data[1];
> - break;
> - case CMD_CHIP_STATE_EVENT:
> - channel = msg->u.chip_state_event.channel;
> - status = msg->u.chip_state_event.status;
> - txerr = msg->u.chip_state_event.tx_errors_count;
> - rxerr = msg->u.chip_state_event.rx_errors_count;
> - error_factor = 0;
> - break;
> - default:
> - dev_err(dev->udev->dev.parent, "Invalid msg id (%d)\n",
> - msg->id);
> - return;
> - }
>
> - if (channel >= dev->nchannels) {
> + if (es->channel >= dev->nchannels) {
> dev_err(dev->udev->dev.parent,
> - "Invalid channel number (%d)\n", channel);
> + "Invalid channel number (%d)\n", es->channel);
> return;
> }
>
> - priv = dev->nets[channel];
> + priv = dev->nets[es->channel];
> stats = &priv->netdev->stats;
>
> - if (status & M16C_STATE_BUS_RESET) {
> + if (es->status & M16C_STATE_BUS_RESET) {
> kvaser_usb_unlink_tx_urbs(priv);
> return;
> }
> @@ -675,9 +840,9 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev,
>
> new_state = priv->can.state;
>
> - netdev_dbg(priv->netdev, "Error status: 0x%02x\n", status);
> + netdev_dbg(priv->netdev, "Error status: 0x%02x\n", es->status);
>
> - if (status & M16C_STATE_BUS_OFF) {
> + if (es->status & M16C_STATE_BUS_OFF) {
> cf->can_id |= CAN_ERR_BUSOFF;
>
> priv->can.can_stats.bus_off++;
> @@ -687,12 +852,12 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev,
> netif_carrier_off(priv->netdev);
>
> new_state = CAN_STATE_BUS_OFF;
> - } else if (status & M16C_STATE_BUS_PASSIVE) {
> + } else if (es->status & M16C_STATE_BUS_PASSIVE) {
> if (priv->can.state != CAN_STATE_ERROR_PASSIVE) {
> cf->can_id |= CAN_ERR_CRTL;
>
> - if (txerr || rxerr)
> - cf->data[1] = (txerr > rxerr)
> + if (es->txerr || es->rxerr)
> + cf->data[1] = (es->txerr > es->rxerr)
> ? CAN_ERR_CRTL_TX_PASSIVE
> : CAN_ERR_CRTL_RX_PASSIVE;
> else
> @@ -703,13 +868,11 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev,
> }
>
> new_state = CAN_STATE_ERROR_PASSIVE;
> - }
> -
> - if (status == M16C_STATE_BUS_ERROR) {
> + } else if (es->status & M16C_STATE_BUS_ERROR) {
> if ((priv->can.state < CAN_STATE_ERROR_WARNING) &&
> - ((txerr >= 96) || (rxerr >= 96))) {
> + ((es->txerr >= 96) || (es->rxerr >= 96))) {
> cf->can_id |= CAN_ERR_CRTL;
> - cf->data[1] = (txerr > rxerr)
> + cf->data[1] = (es->txerr > es->rxerr)
> ? CAN_ERR_CRTL_TX_WARNING
> : CAN_ERR_CRTL_RX_WARNING;
>
> @@ -723,7 +886,7 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev,
> }
> }
>
> - if (!status) {
> + if (!es->status) {
> cf->can_id |= CAN_ERR_PROT;
> cf->data[2] = CAN_ERR_PROT_ACTIVE;
>
> @@ -739,34 +902,48 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev,
> priv->can.can_stats.restarts++;
> }
>
> - if (error_factor) {
> - priv->can.can_stats.bus_error++;
> - stats->rx_errors++;
> -
> - cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
> -
> - if (error_factor & M16C_EF_ACKE)
> - cf->data[3] |= (CAN_ERR_PROT_LOC_ACK);
> - if (error_factor & M16C_EF_CRCE)
> - cf->data[3] |= (CAN_ERR_PROT_LOC_CRC_SEQ |
> - CAN_ERR_PROT_LOC_CRC_DEL);
> - if (error_factor & M16C_EF_FORME)
> - cf->data[2] |= CAN_ERR_PROT_FORM;
> - if (error_factor & M16C_EF_STFE)
> - cf->data[2] |= CAN_ERR_PROT_STUFF;
> - if (error_factor & M16C_EF_BITE0)
> - cf->data[2] |= CAN_ERR_PROT_BIT0;
> - if (error_factor & M16C_EF_BITE1)
> - cf->data[2] |= CAN_ERR_PROT_BIT1;
> - if (error_factor & M16C_EF_TRE)
> - cf->data[2] |= CAN_ERR_PROT_TX;
> + switch (dev->family) {
> + case KVASER_LEAF:
> + if (es->leaf.error_factor) {
> + priv->can.can_stats.bus_error++;
> + stats->rx_errors++;
> +
> + cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
> +
> + if (es->leaf.error_factor & M16C_EF_ACKE)
> + cf->data[3] |= (CAN_ERR_PROT_LOC_ACK);
> + if (es->leaf.error_factor & M16C_EF_CRCE)
> + cf->data[3] |= (CAN_ERR_PROT_LOC_CRC_SEQ |
> + CAN_ERR_PROT_LOC_CRC_DEL);
> + if (es->leaf.error_factor & M16C_EF_FORME)
> + cf->data[2] |= CAN_ERR_PROT_FORM;
> + if (es->leaf.error_factor & M16C_EF_STFE)
> + cf->data[2] |= CAN_ERR_PROT_STUFF;
> + if (es->leaf.error_factor & M16C_EF_BITE0)
> + cf->data[2] |= CAN_ERR_PROT_BIT0;
> + if (es->leaf.error_factor & M16C_EF_BITE1)
> + cf->data[2] |= CAN_ERR_PROT_BIT1;
> + if (es->leaf.error_factor & M16C_EF_TRE)
> + cf->data[2] |= CAN_ERR_PROT_TX;
> + }
> + break;
> + case KVASER_USBCAN:
> + if (es->usbcan.error_state & USBCAN_ERROR_STATE_TX_ERROR)
> + stats->tx_errors++;
> + if (es->usbcan.error_state & USBCAN_ERROR_STATE_RX_ERROR)
> + stats->rx_errors++;
> + if (es->usbcan.error_state & USBCAN_ERROR_STATE_BUSERROR) {
> + priv->can.can_stats.bus_error++;
> + cf->can_id |= CAN_ERR_BUSERROR;
> + }
> + break;
> }
>
> - cf->data[6] = txerr;
> - cf->data[7] = rxerr;
> + cf->data[6] = es->txerr;
> + cf->data[7] = es->rxerr;
>
> - priv->bec.txerr = txerr;
> - priv->bec.rxerr = rxerr;
> + priv->bec.txerr = es->txerr;
> + priv->bec.rxerr = es->rxerr;
>
> priv->can.state = new_state;
>
> @@ -775,6 +952,124 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev,
> netif_rx(skb);
> }
>
> +/* For USBCAN, report error to userspace iff the channels's errors counter
> + * has increased, or we're the only channel seeing a bus error state.
> + */
> +static void kvaser_usbcan_conditionally_rx_error(const struct kvaser_usb *dev,
> + struct kvaser_error_summary *es)
const struct kvaser_error_summary *es?
> +{
> + struct kvaser_usb_net_priv *priv;
> + int channel;
> + bool report_error;
> +
> + channel = es->channel;
> + if (channel >= dev->nchannels) {
> + dev_err(dev->udev->dev.parent,
> + "Invalid channel number (%d)\n", channel);
> + return;
> + }
> +
> + priv = dev->nets[channel];
> + report_error = false;
> +
> + if (es->txerr > priv->bec.txerr) {
> + es->usbcan.error_state |= USBCAN_ERROR_STATE_TX_ERROR;
> + report_error = true;
> + }
> + if (es->rxerr > priv->bec.rxerr) {
> + es->usbcan.error_state |= USBCAN_ERROR_STATE_RX_ERROR;
> + report_error = true;
> + }
> + if ((es->status & M16C_STATE_BUS_ERROR) &&
> + !(es->usbcan.other_ch_status & M16C_STATE_BUS_ERROR)) {
> + es->usbcan.error_state |= USBCAN_ERROR_STATE_BUSERROR;
> + report_error = true;
> + }
> +
> + if (report_error)
> + kvaser_usb_rx_error(dev, es);
> +}
> +
> +static void kvaser_usbcan_rx_error(const struct kvaser_usb *dev,
> + const struct kvaser_msg *msg)
> +{
> + struct kvaser_error_summary es = { };
> +
> + switch (msg->id) {
> + /* Sometimes errors are sent as unsolicited chip state events */
> + case CMD_CHIP_STATE_EVENT:
> + es.channel = msg->u.usbcan.chip_state_event.channel;
> + es.status = msg->u.usbcan.chip_state_event.status;
> + es.txerr = msg->u.usbcan.chip_state_event.tx_errors_count;
> + es.rxerr = msg->u.usbcan.chip_state_event.rx_errors_count;
> + kvaser_usbcan_conditionally_rx_error(dev, &es);
> + break;
> +
> + case CMD_CAN_ERROR_EVENT:
> + es.channel = 0;
> + es.status = msg->u.usbcan.error_event.status_ch0;
> + es.txerr = msg->u.usbcan.error_event.tx_errors_count_ch0;
> + es.rxerr = msg->u.usbcan.error_event.rx_errors_count_ch0;
> + es.usbcan.other_ch_status =
> + msg->u.usbcan.error_event.status_ch1;
> + kvaser_usbcan_conditionally_rx_error(dev, &es);
> +
> + /* The USBCAN firmware does not support more than 2 channels.
Does USBCAN support always 2 channels or are there models with 1
channels, too. I'd rephrase ..."does support up to 2 channels."
> + * Now that ch0 was checked, check if ch1 has any errors.
> + */
> + if (dev->nchannels == MAX_USBCAN_NET_DEVICES) {
> + es.channel = 1;
> + es.status = msg->u.usbcan.error_event.status_ch1;
> + es.txerr = msg->u.usbcan.error_event.tx_errors_count_ch1;
> + es.rxerr = msg->u.usbcan.error_event.rx_errors_count_ch1;
> + es.usbcan.other_ch_status =
> + msg->u.usbcan.error_event.status_ch0;
> + kvaser_usbcan_conditionally_rx_error(dev, &es);
> + }
> + break;
> +
> + default:
> + dev_err(dev->udev->dev.parent, "Invalid msg id (%d)\n",
> + msg->id);
> + }
> +}
> +
> +static void kvaser_leaf_rx_error(const struct kvaser_usb *dev,
> + const struct kvaser_msg *msg)
> +{
> + struct kvaser_error_summary es = { };
> +
> + switch (msg->id) {
> + case CMD_CAN_ERROR_EVENT:
> + es.channel = msg->u.leaf.error_event.channel;
> + es.status = msg->u.leaf.error_event.status;
> + es.txerr = msg->u.leaf.error_event.tx_errors_count;
> + es.rxerr = msg->u.leaf.error_event.rx_errors_count;
> + es.leaf.error_factor = msg->u.leaf.error_event.error_factor;
> + break;
> + case CMD_LEAF_LOG_MESSAGE:
> + es.channel = msg->u.leaf.log_message.channel;
> + es.status = msg->u.leaf.log_message.data[0];
> + es.txerr = msg->u.leaf.log_message.data[2];
> + es.rxerr = msg->u.leaf.log_message.data[3];
> + es.leaf.error_factor = msg->u.leaf.log_message.data[1];
> + break;
> + case CMD_CHIP_STATE_EVENT:
> + es.channel = msg->u.leaf.chip_state_event.channel;
> + es.status = msg->u.leaf.chip_state_event.status;
> + es.txerr = msg->u.leaf.chip_state_event.tx_errors_count;
> + es.rxerr = msg->u.leaf.chip_state_event.rx_errors_count;
> + es.leaf.error_factor = 0;
> + break;
> + default:
> + dev_err(dev->udev->dev.parent, "Invalid msg id (%d)\n",
> + msg->id);
> + return;
> + }
> +
> + kvaser_usb_rx_error(dev, &es);
> +}
> +
> static void kvaser_usb_rx_can_err(const struct kvaser_usb_net_priv *priv,
> const struct kvaser_msg *msg)
> {
> @@ -782,16 +1077,16 @@ static void kvaser_usb_rx_can_err(const struct kvaser_usb_net_priv *priv,
> struct sk_buff *skb;
> struct net_device_stats *stats = &priv->netdev->stats;
>
> - if (msg->u.rx_can.flag & (MSG_FLAG_ERROR_FRAME |
> + if (msg->u.rx_can_header.flag & (MSG_FLAG_ERROR_FRAME |
> MSG_FLAG_NERR)) {
> netdev_err(priv->netdev, "Unknow error (flags: 0x%02x)\n",
> - msg->u.rx_can.flag);
> + msg->u.rx_can_header.flag);
>
> stats->rx_errors++;
> return;
> }
>
> - if (msg->u.rx_can.flag & MSG_FLAG_OVERRUN) {
> + if (msg->u.rx_can_header.flag & MSG_FLAG_OVERRUN) {
> stats->rx_over_errors++;
> stats->rx_errors++;
>
> @@ -817,7 +1112,8 @@ static void kvaser_usb_rx_can_msg(const struct kvaser_usb *dev,
> struct can_frame *cf;
> struct sk_buff *skb;
> struct net_device_stats *stats;
> - u8 channel = msg->u.rx_can.channel;
> + u8 channel = msg->u.rx_can_header.channel;
> + const u8 *rx_msg;
>
> if (channel >= dev->nchannels) {
> dev_err(dev->udev->dev.parent,
> @@ -828,19 +1124,32 @@ static void kvaser_usb_rx_can_msg(const struct kvaser_usb *dev,
> priv = dev->nets[channel];
> stats = &priv->netdev->stats;
>
> - if ((msg->u.rx_can.flag & MSG_FLAG_ERROR_FRAME) &&
> - (msg->id == CMD_LOG_MESSAGE)) {
> - kvaser_usb_rx_error(dev, msg);
> + if ((msg->u.rx_can_header.flag & MSG_FLAG_ERROR_FRAME) &&
> + (dev->family == KVASER_LEAF && msg->id == CMD_LEAF_LOG_MESSAGE)) {
> + kvaser_leaf_rx_error(dev, msg);
> return;
> - } else if (msg->u.rx_can.flag & (MSG_FLAG_ERROR_FRAME |
> - MSG_FLAG_NERR |
> - MSG_FLAG_OVERRUN)) {
> + } else if (msg->u.rx_can_header.flag & (MSG_FLAG_ERROR_FRAME |
> + MSG_FLAG_NERR |
> + MSG_FLAG_OVERRUN)) {
> kvaser_usb_rx_can_err(priv, msg);
> return;
> - } else if (msg->u.rx_can.flag & ~MSG_FLAG_REMOTE_FRAME) {
> + } else if (msg->u.rx_can_header.flag & ~MSG_FLAG_REMOTE_FRAME) {
> netdev_warn(priv->netdev,
> "Unhandled frame (flags: 0x%02x)",
> - msg->u.rx_can.flag);
> + msg->u.rx_can_header.flag);
> + return;
> + }
> +
> + switch (dev->family) {
> + case KVASER_LEAF:
> + rx_msg = msg->u.leaf.rx_can.msg;
> + break;
> + case KVASER_USBCAN:
> + rx_msg = msg->u.usbcan.rx_can.msg;
> + break;
> + default:
> + dev_err(dev->udev->dev.parent,
> + "Invalid device family (%d)\n", dev->family);
> return;
should not happen.
> }
>
> @@ -850,38 +1159,37 @@ static void kvaser_usb_rx_can_msg(const struct kvaser_usb *dev,
> return;
> }
>
> - if (msg->id == CMD_LOG_MESSAGE) {
> - cf->can_id = le32_to_cpu(msg->u.log_message.id);
> + if (dev->family == KVASER_LEAF && msg->id == CMD_LEAF_LOG_MESSAGE) {
> + cf->can_id = le32_to_cpu(msg->u.leaf.log_message.id);
> if (cf->can_id & KVASER_EXTENDED_FRAME)
> cf->can_id &= CAN_EFF_MASK | CAN_EFF_FLAG;
> else
> cf->can_id &= CAN_SFF_MASK;
>
> - cf->can_dlc = get_can_dlc(msg->u.log_message.dlc);
> + cf->can_dlc = get_can_dlc(msg->u.leaf.log_message.dlc);
>
> - if (msg->u.log_message.flags & MSG_FLAG_REMOTE_FRAME)
> + if (msg->u.leaf.log_message.flags & MSG_FLAG_REMOTE_FRAME)
> cf->can_id |= CAN_RTR_FLAG;
> else
> - memcpy(cf->data, &msg->u.log_message.data,
> + memcpy(cf->data, &msg->u.leaf.log_message.data,
> cf->can_dlc);
> } else {
> - cf->can_id = ((msg->u.rx_can.msg[0] & 0x1f) << 6) |
> - (msg->u.rx_can.msg[1] & 0x3f);
> + cf->can_id = ((rx_msg[0] & 0x1f) << 6) | (rx_msg[1] & 0x3f);
>
> if (msg->id == CMD_RX_EXT_MESSAGE) {
> cf->can_id <<= 18;
> - cf->can_id |= ((msg->u.rx_can.msg[2] & 0x0f) << 14) |
> - ((msg->u.rx_can.msg[3] & 0xff) << 6) |
> - (msg->u.rx_can.msg[4] & 0x3f);
> + cf->can_id |= ((rx_msg[2] & 0x0f) << 14) |
> + ((rx_msg[3] & 0xff) << 6) |
> + (rx_msg[4] & 0x3f);
> cf->can_id |= CAN_EFF_FLAG;
> }
>
> - cf->can_dlc = get_can_dlc(msg->u.rx_can.msg[5]);
> + cf->can_dlc = get_can_dlc(rx_msg[5]);
>
> - if (msg->u.rx_can.flag & MSG_FLAG_REMOTE_FRAME)
> + if (msg->u.rx_can_header.flag & MSG_FLAG_REMOTE_FRAME)
> cf->can_id |= CAN_RTR_FLAG;
> else
> - memcpy(cf->data, &msg->u.rx_can.msg[6],
> + memcpy(cf->data, &rx_msg[6],
> cf->can_dlc);
> }
>
> @@ -944,21 +1252,35 @@ static void kvaser_usb_handle_message(const struct kvaser_usb *dev,
>
> case CMD_RX_STD_MESSAGE:
> case CMD_RX_EXT_MESSAGE:
> - case CMD_LOG_MESSAGE:
> + kvaser_usb_rx_can_msg(dev, msg);
> + break;
> +
> + case CMD_LEAF_LOG_MESSAGE:
> + if (dev->family != KVASER_LEAF)
> + goto warn;
> kvaser_usb_rx_can_msg(dev, msg);
> break;
>
> case CMD_CHIP_STATE_EVENT:
> case CMD_CAN_ERROR_EVENT:
> - kvaser_usb_rx_error(dev, msg);
> + if (dev->family == KVASER_LEAF)
> + kvaser_leaf_rx_error(dev, msg);
> + else
> + kvaser_usbcan_rx_error(dev, msg);
> break;
>
> case CMD_TX_ACKNOWLEDGE:
> kvaser_usb_tx_acknowledge(dev, msg);
> break;
>
> + /* Ignored messages */
> + case CMD_USBCAN_CLOCK_OVERFLOW_EVENT:
> + if (dev->family != KVASER_USBCAN)
> + goto warn;
> + break;
> +
> default:
> - dev_warn(dev->udev->dev.parent,
> +warn: dev_warn(dev->udev->dev.parent,
> "Unhandled message (%d)\n", msg->id);
> break;
> }
> @@ -1178,7 +1500,7 @@ static void kvaser_usb_unlink_all_urbs(struct kvaser_usb *dev)
> dev->rxbuf[i],
> dev->rxbuf_dma[i]);
>
> - for (i = 0; i < MAX_NET_DEVICES; i++) {
> + for (i = 0; i < dev->nchannels; i++) {
> struct kvaser_usb_net_priv *priv = dev->nets[i];
>
> if (priv)
> @@ -1286,6 +1608,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
> struct kvaser_msg *msg;
> int i, err;
> int ret = NETDEV_TX_OK;
> + uint8_t *msg_tx_can_flags;
u8 please
>
> if (can_dropped_invalid_skb(netdev, skb))
> return NETDEV_TX_OK;
> @@ -1307,9 +1630,23 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
>
> msg = buf;
> msg->len = MSG_HEADER_LEN + sizeof(struct kvaser_msg_tx_can);
> - msg->u.tx_can.flags = 0;
> msg->u.tx_can.channel = priv->channel;
>
> + switch (dev->family) {
> + case KVASER_LEAF:
> + msg_tx_can_flags = &msg->u.tx_can.leaf.flags;
> + break;
> + case KVASER_USBCAN:
> + msg_tx_can_flags = &msg->u.tx_can.usbcan.flags;
> + break;
> + default:
> + dev_err(dev->udev->dev.parent,
> + "Invalid device family (%d)\n", dev->family);
> + goto releasebuf;
should not happen, please remove
> + }
> +
> + *msg_tx_can_flags = 0;
> +
> if (cf->can_id & CAN_EFF_FLAG) {
> msg->id = CMD_TX_EXT_MESSAGE;
> msg->u.tx_can.msg[0] = (cf->can_id >> 24) & 0x1f;
> @@ -1327,7 +1664,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
> memcpy(&msg->u.tx_can.msg[6], cf->data, cf->can_dlc);
>
> if (cf->can_id & CAN_RTR_FLAG)
> - msg->u.tx_can.flags |= MSG_FLAG_REMOTE_FRAME;
> + *msg_tx_can_flags |= MSG_FLAG_REMOTE_FRAME;
>
> for (i = 0; i < ARRAY_SIZE(priv->tx_contexts); i++) {
> if (priv->tx_contexts[i].echo_index == MAX_TX_URBS) {
> @@ -1596,6 +1933,17 @@ static int kvaser_usb_probe(struct usb_interface *intf,
> if (!dev)
> return -ENOMEM;
>
> + if (kvaser_is_leaf(id)) {
> + dev->family = KVASER_LEAF;
> + } else if (kvaser_is_usbcan(id)) {
> + dev->family = KVASER_USBCAN;
> + } else {
> + dev_err(&intf->dev,
> + "Product ID (%d) does not belong to any known Kvaser USB family",
> + id->idProduct);
> + return -ENODEV;
> + }
> +
> err = kvaser_usb_get_endpoints(intf, &dev->bulk_in, &dev->bulk_out);
> if (err) {
> dev_err(&intf->dev, "Cannot get usb endpoint(s)");
>
Marc
--
Pengutronix e.K. | Marc Kleine-Budde |
Industrial Linux Solutions | Phone: +49-231-2826-924 |
Vertretung West/Dortmund | Fax: +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686 | http://www.pengutronix.de |
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]
^ permalink raw reply
* RE: /proc/net/dev regression
From: David Laight @ 2015-01-12 11:47 UTC (permalink / raw)
To: 'Al Viro', Carlos R. Mafra
Cc: LKML, Hauke Mehrtens, John W. Linville, netdev@vger.kernel.org
In-Reply-To: <20150111013913.GE22149@ZenIV.linux.org.uk>
From: Al Viro
> > I think the problem with wmnet is not that it was expecting the fields
> > to be aligned because it never had problems before (when definitely more
> > than 10 megabytes were received, wmnet is crappy but not _that_ crappy).
> >
> > I think the problem really was here,
> >
> > totalbytes_in = strtoul(&buffer[7], NULL, 10);
> >
> > After the patch the device name is 8 characters long and &buffer[7]
> > overlaps with the name instead of reading the bytes. Before the
> > patch is was fine because the call to strtoul() seems correct in the
> > sense that it would read everything until the NULL. So more than 10
> > megabytes was still ok.
> >
> > So I guess I was wrong when suggesting that the problem was the
> > alignment.
>
> Several lines below there's this:
> totalpackets_out = strtoul(&buffer[74], NULL, 10);
> if (totalpackets_out != lastpackets_out) {
> totalbytes_out = strtoul(&buffer[66], NULL, 10);
> diffpackets_out += totalpackets_out - lastpackets_out;
> diffbytes_out += totalbytes_out - lastbytes_out;
> lastpackets_out = totalpackets_out;
> lastbytes_out = totalbytes_out;
> tx = True;
> }
>
> So I'm afraid it *is* that crappy. This function really should use scanf();
> note that updateStats_ipchains() in the same file does just that (well,
> fgets()+sscanf() for fsck knows what reason). And I'd be careful with all
> those %d, actually - it's not _that_ hard to get more than 4Gb sent.
> scanf formats really ought to match the kernel-side (seq_)printf one...
IMHO it is safer to use strchr(p, ' '); to skip the interface name and then
use repeated calls to strtoull() to read the numbers.
Correctly/safely using scanf() is really too hard.
David
^ permalink raw reply
* Re: [PATCH net] ipv6: Prevent ipv6_find_hdr() from returning ENOENT for valid non-first fragments
From: Pablo Neira Ayuso @ 2015-01-12 11:51 UTC (permalink / raw)
To: Rahul Sharma; +Cc: Hannes Frederic Sowa, netdev, linux-kernel, netfilter-devel
In-Reply-To: <CAFB3abwetkNa4753g9d-z8xU0fMThyApuESQNkbxGa8Yz2YRfQ@mail.gmail.com>
On Mon, Jan 12, 2015 at 04:38:16PM +0530, Rahul Sharma wrote:
> Hi Pablo, Hannes
>
> On Fri, Jan 9, 2015 at 9:20 PM, Hannes Frederic Sowa
> <hannes@stressinduktion.org> wrote:
> > On Fr, 2015-01-09 at 12:45 +0100, Pablo Neira Ayuso wrote:
> >> Hi Hannes,
> >>
> >> On Fri, Jan 09, 2015 at 12:34:15PM +0100, Hannes Frederic Sowa wrote:
> >> > On Fri, Jan 9, 2015, at 08:18, Rahul Sharma wrote:
> >> > > Hi Pablo,
> >> > >
> >> > > On Fri, Jan 9, 2015 at 5:35 AM, Pablo Neira Ayuso <pablo@netfilter.org>
> >> > > wrote:
> >> > > > On Thu, Jan 08, 2015 at 11:39:16PM +0100, Hannes Frederic Sowa wrote:
> >> > > >> Hi Pablo,
> >> > > >>
> >> > > >> On Thu, Jan 8, 2015, at 21:53, Pablo Neira Ayuso wrote:
> >> > > >> > I'm afraid we cannot just get rid of that !ipv6_ext_hdr() check. The
> >> > > >> > ipv6_find_hdr() function is designed to return the transport protocol.
> >> > > >> > After the proposed change, it will return extension header numbers.
> >> > > >> > This will break existing ip6tables rulesets since the `-p' option
> >> > > >> > relies on this function to match the transport protocol.
> >> > > >> >
> >> > > >> > Note that the AH header is skipped (see code a bit below this
> >> > > >> > problematic fragmentation handling) so the follow up header after the
> >> > > >> > AH header is returned as the transport header.
> >> > > >> >
> >> > > >> > We can probably return the AH protocol number for non-1st fragments.
> >> > > >> > However, that would be something new to ip6tables since nobody has
> >> > > >> > ever seen packet matching `-p ah' rules. Thus, we restore control to
> >> > > >> > the user to allow this, but we would accept all kind of fragmented AH
> >> > > >> > traffic through the firewall since we cannot know what transport
> >> > > >> > protocol contains from non-1st fragments (unless I'm missing anything,
> >> > > >> > I need to have a closer look at this again tomorrow with fresher
> >> > > >> > mind).
> >> > > >>
> >> > > >> The code in question is guarded by (_frag_off != 0), so we are
> >> > > >> definitely processing a non-1st fragment currently. The -p match would
> >> > > >> happen at the time when the packet is reassembled and thus ipv6_find_hdr
> >> > > >> will find the real transport (final) header at this point (I hope I
> >> > > >> followed the code correctly here).
> >> > > >
> >> > > > Then, Rahul should get things working by modprobing nf_defrag_ipv6.
> >> > >
> >> > > I already had nf_defrag_ipv6 installed when the issue occured. But I
> >> > > see ip6table_raw_hook returning NF_DROP for the second fragment.
> >> >
> >> > That's what I expected. I think the change only affects hooks before
> >> > reassembly.
> >>
> >> reassembly happens at NF_IP6_PRI_CONNTRACK_DEFRAG (-400), so that
> >> happens before NF_IP6_PRI_RAW (-300) in IPv6 which is where the raw
> >> table is placed.
> >
> > I tried to reproduce it, but couldn't get non-1st fragments getting
> > dropped during traversal of the raw table. They get dropped earlier at
> > during reassembly or pass.
> >
> > I agree with Pablo, I also would like to see more data.
> >
> > Thanks,
> > Hannes
> >
> >
>
> I enabled pr_debug() and there was no error in nf_ct_frag6_gather().
> It seems to have defragmented the packet correctly. As expected,
> ipv6_defrag() returns NF_STOLEN for the first packet after queuing it.
> For the next fragment, ipv6_defrag() calls nf_ct_frag6_output() after
> after reassembling it.
nf_ct_frag6_output() doesn't exist anymore. You're using an old
kernel, you should have started by telling so in your report.
See 6aafeef ("netfilter: push reasm skb through instead of original
frag skbs").
^ permalink raw reply
* Re: [PATCH v3 4/4] can: kvaser_usb: Add support for the Usbcan-II family
From: Marc Kleine-Budde @ 2015-01-12 11:51 UTC (permalink / raw)
To: Ahmed S. Darwish
Cc: Olivier Sobrie, Oliver Hartkopp, Wolfgang Grandegger,
David S. Miller, Paul Gortmaker, Linux-CAN, netdev, LKML
In-Reply-To: <20150108151901.GA11398@vivalin-002>
[-- Attachment #1: Type: text/plain, Size: 1559 bytes --]
On 01/08/2015 04:19 PM, Ahmed S. Darwish wrote:
[...]
>>> MODULE_DEVICE_TABLE(usb, kvaser_usb_table);
>>> @@ -463,7 +631,18 @@ static int kvaser_usb_get_software_info(struct kvaser_usb *dev)
>>> if (err)
>>> return err;
>>>
>>> - dev->fw_version = le32_to_cpu(msg.u.softinfo.fw_version);
>>> + switch (dev->family) {
>>> + case KVASER_LEAF:
>>> + dev->fw_version = le32_to_cpu(msg.u.leaf.softinfo.fw_version);
>>> + break;
>>> + case KVASER_USBCAN:
>>> + dev->fw_version = le32_to_cpu(msg.u.usbcan.softinfo.fw_version);
>>> + break;
>>> + default:
>>> + dev_err(dev->udev->dev.parent,
>>> + "Invalid device family (%d)\n", dev->family);
>>> + return -EINVAL;
>>
>> The default case should not happen. I think you can remove it.
> It's true, it _should_ never happen. But I only add such checks if
> the follow-up code critically depends on a certain `dev->family`
> behavior. So it's kind of a defensive check against any possible
> bug in driver or memory.
>
> What do you think?
The kernel is full of callback functions, if you have a bit flip there
you're in trouble anyways. A bug in the driver (or other parts of the
kernel) might overwrite the memory of dev->family, but if this happens,
more things will break.
Marc
--
Pengutronix e.K. | Marc Kleine-Budde |
Industrial Linux Solutions | Phone: +49-231-2826-924 |
Vertretung West/Dortmund | Fax: +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686 | http://www.pengutronix.de |
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]
^ permalink raw reply
* RE: [PATCH net-next v2] tcp: avoid reducing cwnd when ACK+DSACK is received
From: David Laight @ 2015-01-12 11:52 UTC (permalink / raw)
To: 'Yuchung Cheng', Eric Dumazet
Cc: Neal Cardwell, Sébastien Barré, David Miller, Netdev,
Gregory Detal, Nandita Dukkipati
In-Reply-To: <CAK6E8=dhW=0hRYHjjgBh0KwKOQfzHhwY2egusQ8S4tsoJb0Nsg@mail.gmail.com>
From: Yuchung Cheng
> Sent: 09 January 2015 19:44
...
> Sebastien: I suggest breaking down by ACK types for readability. e.g.,
>
> /* This routine deals with acks during a TLP episode.
> * We mark the end of a TLP episode on receiving TLP dupack or when
> * ack is after tlp_high_seq.
> * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe.
> */
> static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
> {
> struct tcp_sock *tp = tcp_sk(sk);
>
> if (before(ack, tp->tlp_high_seq))
> return;
>
> if (flag & FLAG_DSACKING_ACK) {
> /* This DSACK means original and TLP probe arrived; no loss */
> tp->tlp_high_seq = 0;
I think I'd add a 'return' here.
> } else if (after(ack, tp->tlp_high_seq)) {
> /* ACK advances: there was a loss, so reduce cwnd. Reset
> * tlp_high_seq in tcp_init_cwnd_reduction()
> */
> tcp_init_cwnd_reduction(sk);
> tcp_set_ca_state(sk, TCP_CA_CWR);
> tcp_end_cwnd_reduction(sk);
> tcp_try_keep_open(sk);
> NET_INC_STATS_BH(sock_net(sk),
> LINUX_MIB_TCPLOSSPROBERECOVERY);
and here
> } else if (!(flag & (FLAG_SND_UNA_ADVANCED |
> FLAG_NOT_DUP | FLAG_DATA_SACKED))) {
> /* Pure dupack: original and TLP probe arrived; no loss */
> tp->tlp_high_seq = 0;
> }
> }
David
^ permalink raw reply
* Re: [PATCH v4 3/4] can: kvaser_usb: Add support for the Usbcan-II family
From: Ahmed S. Darwish @ 2015-01-12 12:07 UTC (permalink / raw)
To: Marc Kleine-Budde
Cc: Olivier Sobrie, Oliver Hartkopp, Wolfgang Grandegger,
David S. Miller, Paul Gortmaker, Linux-CAN, netdev, LKML
In-Reply-To: <54B3B37C.7090002@pengutronix.de>
Hi Marc,
On Mon, Jan 12, 2015 at 12:43:56PM +0100, Marc Kleine-Budde wrote:
> On 01/11/2015 09:36 PM, Ahmed S. Darwish wrote:
[...]
> > diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
> > index 0eb870b..da47d17 100644
> > --- a/drivers/net/can/usb/kvaser_usb.c
> > +++ b/drivers/net/can/usb/kvaser_usb.c
> > @@ -6,10 +6,12 @@
> > * Parts of this driver are based on the following:
> > * - Kvaser linux leaf driver (version 4.78)
> > * - CAN driver for esd CAN-USB/2
> > + * - Kvaser linux usbcanII driver (version 5.3)
> > *
> > * Copyright (C) 2002-2006 KVASER AB, Sweden. All rights reserved.
> > * Copyright (C) 2010 Matthias Fuchs <matthias.fuchs@esd.eu>, esd gmbh
> > * Copyright (C) 2012 Olivier Sobrie <olivier@sobrie.be>
> > + * Copyright (C) 2015 Valeo Corporation
> > */
> >
> > #include <linux/completion.h>
> > @@ -21,6 +23,15 @@
> > #include <linux/can/dev.h>
> > #include <linux/can/error.h>
> >
> > +/* Kvaser USB CAN dongles are divided into two major families:
> > + * - Leaf: Based on Renesas M32C, running firmware labeled as 'filo'
> > + * - UsbcanII: Based on Renesas M16C, running firmware labeled as 'helios'
> > + */
> > +enum kvaser_usb_family {
> > + KVASER_LEAF,
> > + KVASER_USBCAN,
> > +};
>
> Nitpick: please move after the #defines
>
Will do.
[...]
> > @@ -616,53 +810,24 @@ static void kvaser_usb_unlink_tx_urbs(struct kvaser_usb_net_priv *priv)
> > }
> >
> > static void kvaser_usb_rx_error(const struct kvaser_usb *dev,
> > - const struct kvaser_msg *msg)
> > + struct kvaser_error_summary *es)
>
> Can you make "struct kvaser_error_summary *es" const?
>
Sure.
[...]
> > +/* For USBCAN, report error to userspace iff the channels's errors counter
> > + * has increased, or we're the only channel seeing a bus error state.
> > + */
> > +static void kvaser_usbcan_conditionally_rx_error(const struct kvaser_usb *dev,
> > + struct kvaser_error_summary *es)
>
> const struct kvaser_error_summary *es?
>
Ditto.
[...]
> > +
> > + /* The USBCAN firmware does not support more than 2 channels.
>
> Does USBCAN support always 2 channels or are there models with 1
> channels, too. I'd rephrase ..."does support up to 2 channels."
>
Yup, that will be more accurate.
[...]
> > +
> > + switch (dev->family) {
> > + case KVASER_LEAF:
> > + rx_msg = msg->u.leaf.rx_can.msg;
> > + break;
> > + case KVASER_USBCAN:
> > + rx_msg = msg->u.usbcan.rx_can.msg;
> > + break;
> > + default:
> > + dev_err(dev->udev->dev.parent,
> > + "Invalid device family (%d)\n", dev->family);
> > return;
>
> should not happen.
>
Yes, but otherwise I get GCC warnings of 'rx_msg' possibly
being unused. I can add __maybe_unused to rx_msg of course,
but such annotation may hide possible errors in the future.
> > + switch (dev->family) {
> > + case KVASER_LEAF:
> > + msg_tx_can_flags = &msg->u.tx_can.leaf.flags;
> > + break;
> > + case KVASER_USBCAN:
> > + msg_tx_can_flags = &msg->u.tx_can.usbcan.flags;
> > + break;
> > + default:
> > + dev_err(dev->udev->dev.parent,
> > + "Invalid device family (%d)\n", dev->family);
> > + goto releasebuf;
> should not happen, please remove
Like the 'rx_msg' case above.
Thanks,
Darwish
^ permalink raw reply
* Re: Clarification regarding IFLA_BRPORT_LEARNING_SYNC and aging of fdb entries learnt via br_fdb_external_learn_add()
From: B Viswanath @ 2015-01-12 12:24 UTC (permalink / raw)
To: Arad, Ronen; +Cc: Scott Feldman, Netdev, Jiri Pirko, Siva Mannem
In-Reply-To: <E4CD12F19ABA0C4D8729E087A761DC3505DDAD7E@ORSMSX101.amr.corp.intel.com>
On 12 January 2015 at 16:30, Arad, Ronen <ronen.arad@intel.com> wrote:
>
>
>>-----Original Message-----
>>From: Scott Feldman [mailto:sfeldma@gmail.com]
>>Sent: Saturday, January 10, 2015 10:29 PM
>>To: Arad, Ronen
>>Cc: Netdev; Jiri Pirko; Siva Mannem; marichika4@gmail.com
>>Subject: Re: Clarification regarding IFLA_BRPORT_LEARNING_SYNC and aging of
>>fdb entries learnt via br_fdb_external_learn_add()
>>
>>Perfect, I think we have a good working set of use-cases. Thanks for
>>adding those. Your case 3 seems do-able without too much work since
>>we already know which ones where externally added, just need another
>>per-bridge-port flag to turn off bridging aging of externally learned
>>entries. This will address the performance issue you (and B
>>Viswanath) raised.
>> What about the entry stats, from the user's
>>'bridge -s fdb show" perspective for the bridge fdb entries? Will
>>these numbers match expectations? I think case 1 and case 4 provide a
>>coherent stats view. Case 3 seems to be lacking in this regard.
>>
> I think that statistics accuracy is orthogonal to the mechanism used for aging
> in all the cases where learning sync is enabled (i.e. cases 1, 3, 4).
> Accuracy only depends on how frequent the switch port driver notifies the
> bridge FDB. The best accuracy achievable is with updates once per-second. At
> the other extreme the switch port driver does not refresh entries. It only
> notifies the bridge after entries are removed from the HW. In this extreme case
> the statistics will really show the time since an entry was first learned and
> not the time since it was last re-learned in HW.
> Switch port driver could pick some
> acceptable rate to update the bridge module. Within a typical 5 minutes aging
> interval, updates every 10 seconds or every 30 seconds could be a reasonable
> tradeoff between statistics accuracy and system overhead.
>
I would also like to add that 'whether the end user will really be
interested in the FDB, (and the hit notifications from silicon)'
depends on the actual deployment. In a L3 dominant deployment, people
may not care much about FDB at all. So any time spent inside the
driver trying to update the FDB for statistical reasons will be a
waste of CPU time. On the other hand for an Access switch (or a
Controller) deployment, FDB may really be useful. Unfortunately, the
driver will not be able to guess the deployment.
I am thinking aloud here, but may be we can let this update-timer to
be a configurable item with default being current behaviour
(determined by driver). It serves both purposes.
Thanks
Vissu
>>On Fri, Jan 9, 2015 at 10:51 PM, Arad, Ronen <ronen.arad@intel.com> wrote:
>>> [...]
>>>>> It is indeed simpler. However, if the overhead of reading hit bits from
>>the
>>>>HW
>>>>> and updating freshness of entries using br_fdb_external_learn_add() is too
>>>>> expensive, it would force such platforms to disable learning sync
>>>>altogether.
>>>>> Therefore, I believe aging offload flag (could be sufficient at bridge
>>>>level)
>>>>> and external aging interval (possibly longer than the software aging
>>>>interval)
>>>>> will encourage drivers to use leaning sync.
>>>>>>-scott
>>>>
>>>>I'm not sure I follow that last part.
>>>>
>>>>Can we list out the use-cases to see what's missing?
>>>>
>>>>Case 1: bridge ages out learned_sync'ed entries
>>>>
>>>>bridge port learning: off
>>>>offload port learning: on
>>>>offload port learning_sync: on
>>>>
>>>>Driver calls br_fdb_external_learn_add() periodically to refresh
>>>>bridge fdb entry
>>>>to keep it from going stale.
>>>>Bridge removes aged out fdb entries (and indirectly tells offload
>>>>device to do the same).
>>>>
>>>>Case 2: offload device/bridge age out entries independently
>>>>
>>>>bridge port learning: on
>>>>offload port learning: on
>>>>offload port learning_sync: off
>>>>
>>>>Bridge ages out its stale learned entries, independent of offload device.
>>>>Offload device ages out its stale learned entries, independent of bridge.
>>>>
>>>>Case 3: ?
>>>>
>>>>Please help me finish the use-case list so we can see what's missing.
>>>
>>>
>>> Case 3: offload device ages out external entries and notifies bridge
>>>
>>> bridge port learning: on or off (Bridge only learns from packets seen
>>(Rx/Tx))
>>> offload port learning: on
>>> offload port learning_sync: on
>>> bridge aging of external learn: off
>>> offload device aging: on
>>>
>>> Switch port/device driver ages entries (could be by HW aging or soft aging
>>in
>>> driver/firmware),
>>> notifies bridge about aged entries using br_fdb_externallearn_del().
>>> This allows efficient HW aging and batched notification at a pace
>>independent
>>> of bridge aging interval.
>>> User still enjoys a single VLAN-aware FDB within the bridge module and
>>having
>>> all entries in one place. Externally learned entries are identified as such
>>by
>>> iproute2 "bridge fdb show" command. Device does not have to implement
>>> ndo_bridge_fdb_dump() for each offload port as the bridge module provides it
>>> for the common FDB.
>>>
>>> Case 4: bridge ages owned and external entries at different intervals
>>>
>>> bridge port learning: on (Effectively only for Rx/Tx traffic seen by
>>> software bridge)
>>> offload port learning: on (transient traffic and RxTx, overlap with bridge
>>> learned entries possible)
>>> offload port learning_sync: on
>>> bridge aging of external learn: on
>>> offload device aging: off
>>> bridge aging interval for owned entries: T1
>>> bridge aging interval for external entries: T2 (Typically T2 > T1)
>>>
>>> This allows for fine-tuning the overhead of periodic updates of entries
>>> freshness from offload port device.
>>>
>>> The bottom line of cases 3-4 is that it is desirable to use the common
>>bridge
>>> FDB as long as bridge aging of externally learned entries could be
>>controlled
>>> by the offload device: Either be at a longer interval or disabled.
>>>
>>>>
>>>>-scott
>>>>--
>>>>To unsubscribe from this list: send the line "unsubscribe netdev" in
>>>>the body of a message to majordomo@vger.kernel.org
>>>>More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [PATCH v3 4/4] can: kvaser_usb: Add support for the Usbcan-II family
From: Ahmed S. Darwish @ 2015-01-12 12:26 UTC (permalink / raw)
To: Marc Kleine-Budde
Cc: Olivier Sobrie, Oliver Hartkopp, Wolfgang Grandegger,
David S. Miller, Paul Gortmaker, Linux-CAN, netdev, LKML
In-Reply-To: <54B3B555.5010804@pengutronix.de>
On Mon, Jan 12, 2015 at 12:51:49PM +0100, Marc Kleine-Budde wrote:
> On 01/08/2015 04:19 PM, Ahmed S. Darwish wrote:
>
> [...]
>
> >>> MODULE_DEVICE_TABLE(usb, kvaser_usb_table);
> >>> @@ -463,7 +631,18 @@ static int kvaser_usb_get_software_info(struct kvaser_usb *dev)
> >>> if (err)
> >>> return err;
> >>>
> >>> - dev->fw_version = le32_to_cpu(msg.u.softinfo.fw_version);
> >>> + switch (dev->family) {
> >>> + case KVASER_LEAF:
> >>> + dev->fw_version = le32_to_cpu(msg.u.leaf.softinfo.fw_version);
> >>> + break;
> >>> + case KVASER_USBCAN:
> >>> + dev->fw_version = le32_to_cpu(msg.u.usbcan.softinfo.fw_version);
> >>> + break;
> >>> + default:
> >>> + dev_err(dev->udev->dev.parent,
> >>> + "Invalid device family (%d)\n", dev->family);
> >>> + return -EINVAL;
> >>
> >> The default case should not happen. I think you can remove it.
>
> > It's true, it _should_ never happen. But I only add such checks if
> > the follow-up code critically depends on a certain `dev->family`
> > behavior. So it's kind of a defensive check against any possible
> > bug in driver or memory.
> >
> > What do you think?
>
> The kernel is full of callback functions, if you have a bit flip there
> you're in trouble anyways. A bug in the driver (or other parts of the
> kernel) might overwrite the memory of dev->family, but if this happens,
> more things will break.
>
I see. Thanks for the explanation.
Most of them are now removed in latest submission, except two to
avoid GCC warnings of variables that "may be used uninitialized".
Regards,
Darwish
^ permalink raw reply
* [PATCH 5/6] openvswitch: Allow for any level of nesting in flow attributes
From: Thomas Graf @ 2015-01-12 12:26 UTC (permalink / raw)
To: davem-fT/PcQaiUtIeIZ0/mPfg9Q, jesse-l0M0P4e3n4LQT0dZR+AlfA,
stephen-OTpzqLSitTUnbdJkjeBofR2eb7JE58TQ,
pshelar-l0M0P4e3n4LQT0dZR+AlfA, therbert-hpIqsD4AKlfQT0dZR+AlfA,
alexei.starovoitov-Re5JQEeQqe8AvxtiuMwx3w
Cc: dev-yBygre7rU0TnMu66kgdUjQ, netdev-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <cover.1421064100.git.tgraf-G/eBtMaohhA@public.gmane.org>
nlattr_set() is currently hardcoded to two levels of nesting. This change
introduces struct ovs_len_tbl to define minimal length requirements plus
next level nesting tables to traverse the key attributes to arbitary depth.
Signed-off-by: Thomas Graf <tgraf@suug.ch>
---
v2->v3:
- No change
v1->v2:
- New patch to allow nested Netlink attributes inside
OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS
net/openvswitch/flow_netlink.c | 106 ++++++++++++++++++++++-------------------
1 file changed, 56 insertions(+), 50 deletions(-)
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 8980d32..457ccf3 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -50,6 +50,13 @@
#include "flow_netlink.h"
+struct ovs_len_tbl {
+ int len;
+ const struct ovs_len_tbl *next;
+};
+
+#define OVS_ATTR_NESTED -1
+
static void update_range(struct sw_flow_match *match,
size_t offset, size_t size, bool is_mask)
{
@@ -289,29 +296,44 @@ size_t ovs_key_attr_size(void)
+ nla_total_size(28); /* OVS_KEY_ATTR_ND */
}
+static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
+ [OVS_TUNNEL_KEY_ATTR_ID] = { .len = sizeof(u64) },
+ [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = { .len = sizeof(u32) },
+ [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = { .len = sizeof(u32) },
+ [OVS_TUNNEL_KEY_ATTR_TOS] = { .len = 1 },
+ [OVS_TUNNEL_KEY_ATTR_TTL] = { .len = 1 },
+ [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 },
+ [OVS_TUNNEL_KEY_ATTR_CSUM] = { .len = 0 },
+ [OVS_TUNNEL_KEY_ATTR_TP_SRC] = { .len = sizeof(u16) },
+ [OVS_TUNNEL_KEY_ATTR_TP_DST] = { .len = sizeof(u16) },
+ [OVS_TUNNEL_KEY_ATTR_OAM] = { .len = 0 },
+ [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = { .len = OVS_ATTR_NESTED },
+};
+
/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */
-static const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
- [OVS_KEY_ATTR_ENCAP] = -1,
- [OVS_KEY_ATTR_PRIORITY] = sizeof(u32),
- [OVS_KEY_ATTR_IN_PORT] = sizeof(u32),
- [OVS_KEY_ATTR_SKB_MARK] = sizeof(u32),
- [OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
- [OVS_KEY_ATTR_VLAN] = sizeof(__be16),
- [OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
- [OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
- [OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
- [OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
- [OVS_KEY_ATTR_TCP_FLAGS] = sizeof(__be16),
- [OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
- [OVS_KEY_ATTR_SCTP] = sizeof(struct ovs_key_sctp),
- [OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
- [OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
- [OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
- [OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
- [OVS_KEY_ATTR_RECIRC_ID] = sizeof(u32),
- [OVS_KEY_ATTR_DP_HASH] = sizeof(u32),
- [OVS_KEY_ATTR_TUNNEL] = -1,
- [OVS_KEY_ATTR_MPLS] = sizeof(struct ovs_key_mpls),
+static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
+ [OVS_KEY_ATTR_ENCAP] = { .len = OVS_ATTR_NESTED },
+ [OVS_KEY_ATTR_PRIORITY] = { .len = sizeof(u32) },
+ [OVS_KEY_ATTR_IN_PORT] = { .len = sizeof(u32) },
+ [OVS_KEY_ATTR_SKB_MARK] = { .len = sizeof(u32) },
+ [OVS_KEY_ATTR_ETHERNET] = { .len = sizeof(struct ovs_key_ethernet) },
+ [OVS_KEY_ATTR_VLAN] = { .len = sizeof(__be16) },
+ [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) },
+ [OVS_KEY_ATTR_IPV4] = { .len = sizeof(struct ovs_key_ipv4) },
+ [OVS_KEY_ATTR_IPV6] = { .len = sizeof(struct ovs_key_ipv6) },
+ [OVS_KEY_ATTR_TCP] = { .len = sizeof(struct ovs_key_tcp) },
+ [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) },
+ [OVS_KEY_ATTR_UDP] = { .len = sizeof(struct ovs_key_udp) },
+ [OVS_KEY_ATTR_SCTP] = { .len = sizeof(struct ovs_key_sctp) },
+ [OVS_KEY_ATTR_ICMP] = { .len = sizeof(struct ovs_key_icmp) },
+ [OVS_KEY_ATTR_ICMPV6] = { .len = sizeof(struct ovs_key_icmpv6) },
+ [OVS_KEY_ATTR_ARP] = { .len = sizeof(struct ovs_key_arp) },
+ [OVS_KEY_ATTR_ND] = { .len = sizeof(struct ovs_key_nd) },
+ [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) },
+ [OVS_KEY_ATTR_DP_HASH] = { .len = sizeof(u32) },
+ [OVS_KEY_ATTR_TUNNEL] = { .len = OVS_ATTR_NESTED,
+ .next = ovs_tunnel_key_lens, },
+ [OVS_KEY_ATTR_MPLS] = { .len = sizeof(struct ovs_key_mpls) },
};
static bool is_all_zero(const u8 *fp, size_t size)
@@ -352,8 +374,8 @@ static int __parse_flow_nlattrs(const struct nlattr *attr,
return -EINVAL;
}
- expected_len = ovs_key_lens[type];
- if (nla_len(nla) != expected_len && expected_len != -1) {
+ expected_len = ovs_key_lens[type].len;
+ if (nla_len(nla) != expected_len && expected_len != OVS_ATTR_NESTED) {
OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
type, nla_len(nla), expected_len);
return -EINVAL;
@@ -451,30 +473,16 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
int type = nla_type(a);
int err;
- static const u32 ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
- [OVS_TUNNEL_KEY_ATTR_ID] = sizeof(u64),
- [OVS_TUNNEL_KEY_ATTR_IPV4_SRC] = sizeof(u32),
- [OVS_TUNNEL_KEY_ATTR_IPV4_DST] = sizeof(u32),
- [OVS_TUNNEL_KEY_ATTR_TOS] = 1,
- [OVS_TUNNEL_KEY_ATTR_TTL] = 1,
- [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = 0,
- [OVS_TUNNEL_KEY_ATTR_CSUM] = 0,
- [OVS_TUNNEL_KEY_ATTR_TP_SRC] = sizeof(u16),
- [OVS_TUNNEL_KEY_ATTR_TP_DST] = sizeof(u16),
- [OVS_TUNNEL_KEY_ATTR_OAM] = 0,
- [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS] = -1,
- };
-
if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
OVS_NLERR(log, "Tunnel attr %d out of range max %d",
type, OVS_TUNNEL_KEY_ATTR_MAX);
return -EINVAL;
}
- if (ovs_tunnel_key_lens[type] != nla_len(a) &&
- ovs_tunnel_key_lens[type] != -1) {
+ if (ovs_tunnel_key_lens[type].len != nla_len(a) &&
+ ovs_tunnel_key_lens[type].len != OVS_ATTR_NESTED) {
OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
- type, nla_len(a), ovs_tunnel_key_lens[type]);
+ type, nla_len(a), ovs_tunnel_key_lens[type].len);
return -EINVAL;
}
@@ -912,18 +920,16 @@ static int ovs_key_from_nlattrs(struct sw_flow_match *match, u64 attrs,
return 0;
}
-static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key)
+static void nlattr_set(struct nlattr *attr, u8 val,
+ const struct ovs_len_tbl *tbl)
{
struct nlattr *nla;
int rem;
/* The nlattr stream should already have been validated */
nla_for_each_nested(nla, attr, rem) {
- /* We assume that ovs_key_lens[type] == -1 means that type is a
- * nested attribute
- */
- if (is_attr_mask_key && ovs_key_lens[nla_type(nla)] == -1)
- nlattr_set(nla, val, false);
+ if (tbl && tbl[nla_type(nla)].len == OVS_ATTR_NESTED)
+ nlattr_set(nla, val, tbl[nla_type(nla)].next);
else
memset(nla_data(nla), val, nla_len(nla));
}
@@ -931,7 +937,7 @@ static void nlattr_set(struct nlattr *attr, u8 val, bool is_attr_mask_key)
static void mask_set_nlattr(struct nlattr *attr, u8 val)
{
- nlattr_set(attr, val, true);
+ nlattr_set(attr, val, ovs_key_lens);
}
/**
@@ -1628,8 +1634,8 @@ static int validate_set(const struct nlattr *a,
return -EINVAL;
if (key_type > OVS_KEY_ATTR_MAX ||
- (ovs_key_lens[key_type] != nla_len(ovs_key) &&
- ovs_key_lens[key_type] != -1))
+ (ovs_key_lens[key_type].len != nla_len(ovs_key) &&
+ ovs_key_lens[key_type].len != OVS_ATTR_NESTED))
return -EINVAL;
switch (key_type) {
--
1.9.3
_______________________________________________
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev
^ permalink raw reply related
* [PATCH 0/6 net-next v3] VXLAN Group Policy Extension
From: Thomas Graf @ 2015-01-12 12:26 UTC (permalink / raw)
To: davem, jesse, stephen, pshelar, therbert, alexei.starovoitov; +Cc: dev, netdev
Implements supports for the Group Policy VXLAN extension [0] to provide
a lightweight and simple security label mechanism across network peers
based on VXLAN. The security context and associated metadata is mapped
to/from skb->mark. This allows further mapping to a SELinux context
using SECMARK, to implement ACLs directly with nftables, iptables, OVS,
tc, etc.
The extension is disabled by default and should be run on a distinct
port in mixed Linux VXLAN VTEP environments. Liberal VXLAN VTEPs
which ignore unknown reserved bits will be able to receive VXLAN-GBP
frames.
Simple usage example:
10.1.1.1:
# ip link add vxlan0 type vxlan id 10 remote 10.1.1.2 gbp
# iptables -I OUTPUT -m owner --uid-owner 101 -j MARK --set-mark 0x200
10.1.1.2:
# ip link add vxlan0 type vxlan id 10 remote 10.1.1.1 gbp
# iptables -I INPUT -m mark --mark 0x200 -j DROP
iproute2 [1] and OVS [2] support will be provided in separate patches.
[0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy
[1] https://github.com/tgraf/iproute2/tree/vxlan-gbp
[2] https://github.com/tgraf/ovs/tree/vxlan-gbp
Thomas Graf (6):
vxlan: Allow for VXLAN extensions to be implemented
vxlan: Group Policy extension
vxlan: Only bind to sockets with correct extensions enabled
openvswitch: Rename GENEVE_TUN_OPTS() to TUN_METADATA_OPTS()
openvswitch: Allow for any level of nesting in flow attributes
openvswitch: Support VXLAN Group Policy extension
drivers/net/vxlan.c | 225 ++++++++++++++++++++----------
include/net/ip_tunnels.h | 5 +-
include/net/vxlan.h | 98 +++++++++++++-
include/uapi/linux/if_link.h | 8 ++
include/uapi/linux/openvswitch.h | 11 ++
net/openvswitch/flow.c | 2 +-
net/openvswitch/flow.h | 14 +-
net/openvswitch/flow_netlink.c | 286 ++++++++++++++++++++++++++-------------
net/openvswitch/vport-geneve.c | 2 +-
net/openvswitch/vport-vxlan.c | 90 +++++++++++-
net/openvswitch/vport-vxlan.h | 11 ++
11 files changed, 569 insertions(+), 183 deletions(-)
create mode 100644 net/openvswitch/vport-vxlan.h
--
1.9.3
^ permalink raw reply
* [PATCH 1/6] vxlan: Allow for VXLAN extensions to be implemented
From: Thomas Graf @ 2015-01-12 12:26 UTC (permalink / raw)
To: davem, jesse, stephen, pshelar, therbert, alexei.starovoitov; +Cc: dev, netdev
In-Reply-To: <cover.1421064100.git.tgraf@suug.ch>
The VXLAN receive code is currently conservative in what it accepts and
will reject any frame that uses any of the reserved VXLAN protocol fields.
The VXLAN draft specifies that "reserved fields MUST be set to zero on
transmit and ignored on receive.".
Retain the current conservative parsing behaviour by default but allows
these fields to be used by VXLAN extensions which are explicitly enabled
on the VXLAN socket respectively VXLAN net_device.
Signed-off-by: Thomas Graf <tgraf@suug.ch>
---
v2->v3:
- No change
v1->v2:
- No change
drivers/net/vxlan.c | 29 +++++++++++++++++++----------
include/net/vxlan.h | 32 +++++++++++++++++++++++++++++---
2 files changed, 48 insertions(+), 13 deletions(-)
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 2ab0922..4d52aa9 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -65,7 +65,7 @@
#define VXLAN_VID_MASK (VXLAN_N_VID - 1)
#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
-#define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags required value. */
+#define VXLAN_FLAGS 0x08000000 /* struct vxlanhdr.vx_flags default value. */
/* UDP port for VXLAN traffic.
* The IANA assigned port is 4789, but the Linux default is 8472
@@ -1100,22 +1100,28 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
if (!pskb_may_pull(skb, VXLAN_HLEN))
goto error;
+ vs = rcu_dereference_sk_user_data(sk);
+ if (!vs)
+ goto drop;
+
/* Return packets with reserved bits set */
vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
- if (vxh->vx_flags != htonl(VXLAN_FLAGS) ||
- (vxh->vx_vni & htonl(0xff))) {
- netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
- ntohl(vxh->vx_flags), ntohl(vxh->vx_vni));
- goto error;
+
+ /* For backwards compatibility, only allow reserved fields to be
+ * used by VXLAN extensions if explicitly requested.
+ */
+ if (vs->exts) {
+ if (!vxh->vni_present)
+ goto error_invalid_header;
+ } else {
+ if (vxh->vx_flags != htonl(VXLAN_FLAGS) ||
+ (vxh->vx_vni & htonl(0xff)))
+ goto error_invalid_header;
}
if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
goto drop;
- vs = rcu_dereference_sk_user_data(sk);
- if (!vs)
- goto drop;
-
vs->rcv(vs, skb, vxh->vx_vni);
return 0;
@@ -1124,6 +1130,9 @@ drop:
kfree_skb(skb);
return 0;
+error_invalid_header:
+ netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
+ ntohl(vxh->vx_flags), ntohl(vxh->vx_vni));
error:
/* Return non vxlan pkt */
return 1;
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 903461a..3e98d31 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -11,10 +11,35 @@
#define VNI_HASH_BITS 10
#define VNI_HASH_SIZE (1<<VNI_HASH_BITS)
-/* VXLAN protocol header */
+/* VXLAN protocol header:
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |R|R|R|R|I|R|R|R| Reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | VXLAN Network Identifier (VNI) | Reserved |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * I = 1 VXLAN Network Identifier (VNI) present
+ */
struct vxlanhdr {
- __be32 vx_flags;
- __be32 vx_vni;
+ union {
+ struct {
+#ifdef __LITTLE_ENDIAN_BITFIELD
+ __u8 reserved_flags1:3,
+ vni_present:1,
+ reserved_flags2:4;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ __u8 reserved_flags2:4,
+ vni_present:1,
+ reserved_flags1:3;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+ __u8 vx_reserved1;
+ __be16 vx_reserved2;
+ };
+ __be32 vx_flags;
+ };
+ __be32 vx_vni;
};
struct vxlan_sock;
@@ -25,6 +50,7 @@ struct vxlan_sock {
struct hlist_node hlist;
vxlan_rcv_t *rcv;
void *data;
+ u32 exts;
struct work_struct del_work;
struct socket *sock;
struct rcu_head rcu;
--
1.9.3
^ permalink raw reply related
* [PATCH 3/6] vxlan: Only bind to sockets with correct extensions enabled
From: Thomas Graf @ 2015-01-12 12:26 UTC (permalink / raw)
To: davem, jesse, stephen, pshelar, therbert, alexei.starovoitov; +Cc: dev, netdev
In-Reply-To: <cover.1421064100.git.tgraf@suug.ch>
A VXLAN net_device looking for an appropriate socket may only consider
a socket which has a matching set of extensions enabled. If the
extensions don't match, return a conflict to have the caller create a
distinct socket with distinct port.
The OVS VXLAN port is kept unaware of extensions at this point.
Signed-off-by: Thomas Graf <tgraf@suug.ch>
---
v2->v3:
- No change
v1->v2:
- Improved commit message, reported by Jesse
drivers/net/vxlan.c | 35 +++++++++++++++++++++--------------
include/net/vxlan.h | 2 +-
net/openvswitch/vport-vxlan.c | 2 +-
3 files changed, 23 insertions(+), 16 deletions(-)
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index b148739..61e1112 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -271,14 +271,15 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
}
/* Find VXLAN socket based on network namespace, address family and UDP port */
-static struct vxlan_sock *vxlan_find_sock(struct net *net,
- sa_family_t family, __be16 port)
+static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
+ __be16 port, u32 exts)
{
struct vxlan_sock *vs;
hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
if (inet_sk(vs->sock->sk)->inet_sport == port &&
- inet_sk(vs->sock->sk)->sk.sk_family == family)
+ inet_sk(vs->sock->sk)->sk.sk_family == family &&
+ vs->exts == exts)
return vs;
}
return NULL;
@@ -298,11 +299,12 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
/* Look up VNI in a per net namespace table */
static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id,
- sa_family_t family, __be16 port)
+ sa_family_t family, __be16 port,
+ u32 exts)
{
struct vxlan_sock *vs;
- vs = vxlan_find_sock(net, family, port);
+ vs = vxlan_find_sock(net, family, port, exts);
if (!vs)
return NULL;
@@ -1776,7 +1778,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
ip_rt_put(rt);
dst_vxlan = vxlan_find_vni(vxlan->net, vni,
- dst->sa.sa_family, dst_port);
+ dst->sa.sa_family, dst_port,
+ vxlan->exts);
if (!dst_vxlan)
goto tx_error;
vxlan_encap_bypass(skb, vxlan, dst_vxlan);
@@ -1835,7 +1838,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
dst_release(ndst);
dst_vxlan = vxlan_find_vni(vxlan->net, vni,
- dst->sa.sa_family, dst_port);
+ dst->sa.sa_family, dst_port,
+ vxlan->exts);
if (!dst_vxlan)
goto tx_error;
vxlan_encap_bypass(skb, vxlan, dst_vxlan);
@@ -2005,7 +2009,7 @@ static int vxlan_init(struct net_device *dev)
spin_lock(&vn->sock_lock);
vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
- vxlan->dst_port);
+ vxlan->dst_port, vxlan->exts);
if (vs && atomic_add_unless(&vs->refcnt, 1, 0)) {
/* If we have a socket with same port already, reuse it */
vxlan_vs_add_dev(vs, vxlan);
@@ -2359,7 +2363,7 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
/* Create new listen socket if needed */
static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
vxlan_rcv_t *rcv, void *data,
- u32 flags)
+ u32 flags, u32 exts)
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_sock *vs;
@@ -2387,6 +2391,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
atomic_set(&vs->refcnt, 1);
vs->rcv = rcv;
vs->data = data;
+ vs->exts = exts;
/* Initialize the vxlan udp offloads structure */
vs->udp_offloads.port = port;
@@ -2411,13 +2416,14 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
vxlan_rcv_t *rcv, void *data,
- bool no_share, u32 flags)
+ bool no_share, u32 flags,
+ u32 exts)
{
struct vxlan_net *vn = net_generic(net, vxlan_net_id);
struct vxlan_sock *vs;
bool ipv6 = flags & VXLAN_F_IPV6;
- vs = vxlan_socket_create(net, port, rcv, data, flags);
+ vs = vxlan_socket_create(net, port, rcv, data, flags, exts);
if (!IS_ERR(vs))
return vs;
@@ -2425,7 +2431,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
return vs;
spin_lock(&vn->sock_lock);
- vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port);
+ vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port, exts);
if (vs && ((vs->rcv != rcv) ||
!atomic_add_unless(&vs->refcnt, 1, 0)))
vs = ERR_PTR(-EBUSY);
@@ -2447,7 +2453,8 @@ static void vxlan_sock_work(struct work_struct *work)
__be16 port = vxlan->dst_port;
struct vxlan_sock *nvs;
- nvs = vxlan_sock_add(net, port, vxlan_rcv, NULL, false, vxlan->flags);
+ nvs = vxlan_sock_add(net, port, vxlan_rcv, NULL, false, vxlan->flags,
+ vxlan->exts);
spin_lock(&vn->sock_lock);
if (!IS_ERR(nvs))
vxlan_vs_add_dev(nvs, vxlan);
@@ -2597,7 +2604,7 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
configure_vxlan_exts(vxlan, data[IFLA_VXLAN_EXTENSION]);
if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET,
- vxlan->dst_port)) {
+ vxlan->dst_port, vxlan->exts)) {
pr_info("duplicate VNI %u\n", vni);
return -EEXIST;
}
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 66ec53c..5ba49d5 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -130,7 +130,7 @@ struct vxlan_sock {
struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
vxlan_rcv_t *rcv, void *data,
- bool no_share, u32 flags);
+ bool no_share, u32 flags, u32 exts);
void vxlan_sock_release(struct vxlan_sock *vs);
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index dd68c97..266c595 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -128,7 +128,7 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
vxlan_port = vxlan_vport(vport);
strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
- vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0);
+ vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0, 0);
if (IS_ERR(vs)) {
ovs_vport_free(vport);
return (void *)vs;
--
1.9.3
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox