Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next 3/5] ipv4: support sport, dport and ip protocol in RTM_GETROUTE
From: Roopa Prabhu @ 2018-04-16 20:41 UTC (permalink / raw)
  To: davem; +Cc: netdev, dsa
In-Reply-To: <1523911298-8965-1-git-send-email-roopa@cumulusnetworks.com>

From: Roopa Prabhu <roopa@cumulusnetworks.com>

This is a followup to fib rules sport, dport and ip proto
match support. Having them supported in getroute
makes it easier to test fib rule lookups. Used by fib rule
self tests.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
---
 include/uapi/linux/rtnetlink.h |  3 ++
 net/ipv4/route.c               | 75 +++++++++++++++++++++++++++++++++---------
 2 files changed, 63 insertions(+), 15 deletions(-)

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 9b15005..7947252 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -327,6 +327,9 @@ enum rtattr_type_t {
 	RTA_PAD,
 	RTA_UID,
 	RTA_TTL_PROPAGATE,
+	RTA_SPORT,
+	RTA_DPORT,
+	RTA_IP_PROTO,
 	__RTA_MAX
 };
 
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ccb25d8..ae55711 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2663,6 +2663,18 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src, u32 table_id,
 			from_kuid_munged(current_user_ns(), fl4->flowi4_uid)))
 		goto nla_put_failure;
 
+	if (fl4->fl4_sport &&
+	    nla_put_u16(skb, RTA_SPORT, ntohs(fl4->fl4_sport)))
+		goto nla_put_failure;
+
+	if (fl4->fl4_dport &&
+	    nla_put_u16(skb, RTA_DPORT, ntohs(fl4->fl4_dport)))
+		goto nla_put_failure;
+
+	if (fl4->flowi4_proto &&
+	    nla_put_u8(skb, RTA_IP_PROTO, fl4->flowi4_proto))
+		goto nla_put_failure;
+
 	error = rt->dst.error;
 
 	if (rt_is_input_route(rt)) {
@@ -2695,35 +2707,48 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src, u32 table_id,
 	return -EMSGSIZE;
 }
 
+static int nla_get_port(struct nlattr *attr, __be16 *port)
+{
+	int p = nla_get_u16(attr);
+
+	if (p <= 0 || p >= 0xffff)
+		return -EINVAL;
+
+	*port = htons(p);
+	return 0;
+}
+
 static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 			     struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(in_skb->sk);
-	struct rtmsg *rtm;
 	struct nlattr *tb[RTA_MAX+1];
+	u32 table_id = RT_TABLE_MAIN;
+	__be16 sport = 0, dport = 0;
 	struct fib_result res = {};
 	struct rtable *rt = NULL;
+	struct sk_buff *skb;
+	struct rtmsg *rtm;
 	struct flowi4 fl4;
+	u8 ip_proto = 0;
 	__be32 dst = 0;
 	__be32 src = 0;
+	kuid_t uid;
 	u32 iif;
 	int err;
 	int mark;
-	struct sk_buff *skb;
-	u32 table_id = RT_TABLE_MAIN;
-	kuid_t uid;
 
 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy,
 			  extack);
 	if (err < 0)
-		goto errout;
+		return err;
 
 	rtm = nlmsg_data(nlh);
 
 	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 	if (!skb) {
 		err = -ENOBUFS;
-		goto errout;
+		return err;
 	}
 
 	/* Reserve room for dummy headers, this skb can pass
@@ -2740,6 +2765,20 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		uid = make_kuid(current_user_ns(), nla_get_u32(tb[RTA_UID]));
 	else
 		uid = (iif ? INVALID_UID : current_uid());
+	if (tb[RTA_SPORT]) {
+		err = nla_get_port(tb[RTA_SPORT], &sport);
+		if (err)
+			goto errout_free;
+	}
+
+	if (tb[RTA_DPORT]) {
+		err = nla_get_port(tb[RTA_DPORT], &dport);
+		if (err)
+			goto errout_free;
+	}
+
+	if (tb[RTA_IP_PROTO])
+		ip_proto = nla_get_u8(tb[RTA_IP_PROTO]);
 
 	/* Bugfix: need to give ip_route_input enough of an IP header to
 	 * not gag.
@@ -2757,6 +2796,12 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 	fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
 	fl4.flowi4_mark = mark;
 	fl4.flowi4_uid = uid;
+	if (sport)
+		fl4.fl4_sport = sport;
+	if (dport)
+		fl4.fl4_dport = dport;
+	if (ip_proto)
+		fl4.flowi4_proto = ip_proto;
 
 	rcu_read_lock();
 
@@ -2766,7 +2811,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 		dev = dev_get_by_index_rcu(net, iif);
 		if (!dev) {
 			err = -ENODEV;
-			goto errout_free;
+			goto errout_rcu;
 		}
 
 		skb->protocol	= htons(ETH_P_IP);
@@ -2789,7 +2834,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 	}
 
 	if (err)
-		goto errout_free;
+		goto errout_rcu;
 
 	if (rtm->rtm_flags & RTM_F_NOTIFY)
 		rt->rt_flags |= RTCF_NOTIFY;
@@ -2802,7 +2847,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 			err = fib_props[res.type].error;
 			if (!err)
 				err = -EHOSTUNREACH;
-			goto errout_free;
+			goto errout_rcu;
 		}
 		err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
 				    nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
@@ -2813,18 +2858,18 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 				   NETLINK_CB(in_skb).portid, nlh->nlmsg_seq);
 	}
 	if (err < 0)
-		goto errout_free;
+		goto errout_rcu;
 
 	rcu_read_unlock();
 
-	err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
-errout:
-	return err;
+	return rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
 
 errout_free:
-	rcu_read_unlock();
 	kfree_skb(skb);
-	goto errout;
+	return err;
+errout_rcu:
+	rcu_read_unlock();
+	goto errout_free;
 }
 
 void ip_rt_multicast_event(struct in_device *in_dev)
-- 
2.1.4

^ permalink raw reply related

* [PATCH net-next 2/5] net: fib_rules: add extack support
From: Roopa Prabhu @ 2018-04-16 20:41 UTC (permalink / raw)
  To: davem; +Cc: netdev, dsa
In-Reply-To: <1523911298-8965-1-git-send-email-roopa@cumulusnetworks.com>

From: Roopa Prabhu <roopa@cumulusnetworks.com>

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
---
 include/net/fib_rules.h |  3 ++-
 net/core/fib_rules.c    | 55 +++++++++++++++++++++++++++++++++++++------------
 net/decnet/dn_rules.c   |  7 +++++--
 net/ipv4/fib_rules.c    |  7 +++++--
 net/ipv4/ipmr.c         |  3 ++-
 net/ipv6/fib6_rules.c   |  7 +++++--
 net/ipv6/ip6mr.c        |  3 ++-
 7 files changed, 63 insertions(+), 22 deletions(-)

diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index e5cfcfc..b473df5 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -75,7 +75,8 @@ struct fib_rules_ops {
 	int			(*configure)(struct fib_rule *,
 					     struct sk_buff *,
 					     struct fib_rule_hdr *,
-					     struct nlattr **);
+					     struct nlattr **,
+					     struct netlink_ext_ack *);
 	int			(*delete)(struct fib_rule *);
 	int			(*compare)(struct fib_rule *,
 					   struct fib_rule_hdr *,
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 6780110..ebd9351 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -469,14 +469,18 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (frh->src_len)
 		if (!tb[FRA_SRC] ||
 		    frh->src_len > (ops->addr_size * 8) ||
-		    nla_len(tb[FRA_SRC]) != ops->addr_size)
+		    nla_len(tb[FRA_SRC]) != ops->addr_size) {
+			NL_SET_ERR_MSG(extack, "Invalid source address");
 			goto errout;
+	}
 
 	if (frh->dst_len)
 		if (!tb[FRA_DST] ||
 		    frh->dst_len > (ops->addr_size * 8) ||
-		    nla_len(tb[FRA_DST]) != ops->addr_size)
+		    nla_len(tb[FRA_DST]) != ops->addr_size) {
+			NL_SET_ERR_MSG(extack, "Invalid dst address");
 			goto errout;
+	}
 
 	nlrule = kzalloc(ops->rule_size, GFP_KERNEL);
 	if (!nlrule) {
@@ -537,6 +541,7 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
 		nlrule->l3mdev = nla_get_u8(tb[FRA_L3MDEV]);
 		if (nlrule->l3mdev != 1)
 #endif
+			NL_SET_ERR_MSG(extack, "Invalid l3mdev");
 			goto errout_free;
 	}
 
@@ -554,31 +559,41 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
 		nlrule->suppress_ifgroup = -1;
 
 	if (tb[FRA_GOTO]) {
-		if (nlrule->action != FR_ACT_GOTO)
+		if (nlrule->action != FR_ACT_GOTO) {
+			NL_SET_ERR_MSG(extack, "Unexpected goto");
 			goto errout_free;
+		}
 
 		nlrule->target = nla_get_u32(tb[FRA_GOTO]);
 		/* Backward jumps are prohibited to avoid endless loops */
-		if (nlrule->target <= nlrule->pref)
+		if (nlrule->target <= nlrule->pref) {
+			NL_SET_ERR_MSG(extack, "Backward goto not supported");
 			goto errout_free;
+		}
 	} else if (nlrule->action == FR_ACT_GOTO) {
+		NL_SET_ERR_MSG(extack, "Missing goto target for action goto");
 		goto errout_free;
 	}
 
-	if (nlrule->l3mdev && nlrule->table)
+	if (nlrule->l3mdev && nlrule->table) {
+		NL_SET_ERR_MSG(extack, "l3mdev and table are mutually exclusive");
 		goto errout_free;
+	}
 
 	if (tb[FRA_UID_RANGE]) {
 		if (current_user_ns() != net->user_ns) {
 			err = -EPERM;
+			NL_SET_ERR_MSG(extack, "No permission to set uid");
 			goto errout_free;
 		}
 
 		nlrule->uid_range = nla_get_kuid_range(tb);
 
 		if (!uid_range_set(&nlrule->uid_range) ||
-		    !uid_lte(nlrule->uid_range.start, nlrule->uid_range.end))
+		    !uid_lte(nlrule->uid_range.start, nlrule->uid_range.end)) {
+			NL_SET_ERR_MSG(extack, "Invalid uid range");
 			goto errout_free;
+		}
 	} else {
 		nlrule->uid_range = fib_kuid_range_unset;
 	}
@@ -589,15 +604,19 @@ static int fib_nl2rule(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (tb[FRA_SPORT_RANGE]) {
 		err = nla_get_port_range(tb[FRA_SPORT_RANGE],
 					 &nlrule->sport_range);
-		if (err)
+		if (err) {
+			NL_SET_ERR_MSG(extack, "Invalid sport range");
 			goto errout_free;
+		}
 	}
 
 	if (tb[FRA_DPORT_RANGE]) {
 		err = nla_get_port_range(tb[FRA_DPORT_RANGE],
 					 &nlrule->dport_range);
-		if (err)
+		if (err) {
+			NL_SET_ERR_MSG(extack, "Invalid dport range");
 			goto errout_free;
+		}
 	}
 
 	*rule = nlrule;
@@ -621,18 +640,23 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 	int err = -EINVAL, unresolved = 0;
 	bool user_priority = false;
 
-	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
+		NL_SET_ERR_MSG(extack, "Invalid msg length");
 		goto errout;
+	}
 
 	ops = lookup_rules_ops(net, frh->family);
 	if (!ops) {
 		err = -EAFNOSUPPORT;
+		NL_SET_ERR_MSG(extack, "Rule family not supported");
 		goto errout;
 	}
 
 	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack);
-	if (err < 0)
+	if (err < 0) {
+		NL_SET_ERR_MSG(extack, "Error parsing msg");
 		goto errout;
+	}
 
 	err = fib_nl2rule(skb, nlh, extack, ops, tb, &rule, &user_priority);
 	if (err)
@@ -644,7 +668,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 		goto errout_free;
 	}
 
-	err = ops->configure(rule, skb, frh, tb);
+	err = ops->configure(rule, skb, frh, tb, extack);
 	if (err < 0)
 		goto errout_free;
 
@@ -723,18 +747,23 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh,
 	int err = -EINVAL;
 	bool user_priority = false;
 
-	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh)))
+	if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) {
+		NL_SET_ERR_MSG(extack, "Invalid msg length");
 		goto errout;
+	}
 
 	ops = lookup_rules_ops(net, frh->family);
 	if (ops == NULL) {
 		err = -EAFNOSUPPORT;
+		NL_SET_ERR_MSG(extack, "Rule family not supported");
 		goto errout;
 	}
 
 	err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy, extack);
-	if (err < 0)
+	if (err < 0) {
+		NL_SET_ERR_MSG(extack, "Error parsing msg");
 		goto errout;
+	}
 
 	err = fib_nl2rule(skb, nlh, extack, ops, tb, &nlrule, &user_priority);
 	if (err)
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index c795c3f..7223669 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -121,13 +121,16 @@ static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
 
 static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 				 struct fib_rule_hdr *frh,
-				 struct nlattr **tb)
+				 struct nlattr **tb,
+				 struct netlink_ext_ack *extack)
 {
 	int err = -EINVAL;
 	struct dn_fib_rule *r = (struct dn_fib_rule *)rule;
 
-	if (frh->tos)
+	if (frh->tos) {
+		NL_SET_ERR_MSG(extack, "Invalid tos value");
 		goto  errout;
+	}
 
 	if (rule->table == RT_TABLE_UNSPEC) {
 		if (rule->action == FR_ACT_TO_TBL) {
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 737d11b..f8eb78d 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -213,14 +213,17 @@ static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = {
 
 static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 			       struct fib_rule_hdr *frh,
-			       struct nlattr **tb)
+			       struct nlattr **tb,
+			       struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(skb->sk);
 	int err = -EINVAL;
 	struct fib4_rule *rule4 = (struct fib4_rule *) rule;
 
-	if (frh->tos & ~IPTOS_TOS_MASK)
+	if (frh->tos & ~IPTOS_TOS_MASK) {
+		NL_SET_ERR_MSG(extack, "Invalid tos");
 		goto errout;
+	}
 
 	/* split local/main if they are not already split */
 	err = fib_unmerge(net);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 2fb4de3..38e092e 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -201,7 +201,8 @@ static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
 };
 
 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
-			       struct fib_rule_hdr *frh, struct nlattr **tb)
+			       struct fib_rule_hdr *frh, struct nlattr **tb,
+			       struct netlink_ext_ack *extack)
 {
 	return 0;
 }
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index df113c7..6547fc6 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -245,15 +245,18 @@ static const struct nla_policy fib6_rule_policy[FRA_MAX+1] = {
 
 static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
 			       struct fib_rule_hdr *frh,
-			       struct nlattr **tb)
+			       struct nlattr **tb,
+			       struct netlink_ext_ack *extack)
 {
 	int err = -EINVAL;
 	struct net *net = sock_net(skb->sk);
 	struct fib6_rule *rule6 = (struct fib6_rule *) rule;
 
 	if (rule->action == FR_ACT_TO_TBL && !rule->l3mdev) {
-		if (rule->table == RT6_TABLE_UNSPEC)
+		if (rule->table == RT6_TABLE_UNSPEC) {
+			NL_SET_ERR_MSG(extack, "Invalid table");
 			goto errout;
+		}
 
 		if (fib6_new_table(net, rule->table) == NULL) {
 			err = -ENOBUFS;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 298fd8b..20a419e 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -180,7 +180,8 @@ static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
 };
 
 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
-				struct fib_rule_hdr *frh, struct nlattr **tb)
+				struct fib_rule_hdr *frh, struct nlattr **tb,
+				struct netlink_ext_ack *extack)
 {
 	return 0;
 }
-- 
2.1.4

^ permalink raw reply related

* [PATCH net-next 4/5] ipv6: support sport, dport and ip protocol in RTM_GETROUTE
From: Roopa Prabhu @ 2018-04-16 20:41 UTC (permalink / raw)
  To: davem; +Cc: netdev, dsa
In-Reply-To: <1523911298-8965-1-git-send-email-roopa@cumulusnetworks.com>

From: Roopa Prabhu <roopa@cumulusnetworks.com>

This is a followup to fib6 rules sport, dport and ip proto
match support. Having them supported in getroute
makes it easier to test fib6 rule lookups. Used by fib6 rule
self tests.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
---
 net/ipv6/route.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 49b954d..5086a80 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3986,6 +3986,9 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
 	[RTA_EXPIRES]		= { .type = NLA_U32 },
 	[RTA_UID]		= { .type = NLA_U32 },
 	[RTA_MARK]		= { .type = NLA_U32 },
+	[RTA_IP_PROTO]		= { .type = NLA_U8 },
+	[RTA_SPORT]		= { .type = NLA_U16 },
+	[RTA_DPORT]		= { .type = NLA_U16 },
 };
 
 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -4658,6 +4661,17 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 		     NLM_F_MULTI);
 }
 
+static int nla_get_port(struct nlattr *attr, __be16 *port)
+{
+	int p = nla_get_u16(attr);
+
+	if (p <= 0 || p >= 0xffff)
+		return -EINVAL;
+
+	*port = htons(p);
+	return 0;
+}
+
 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 			      struct netlink_ext_ack *extack)
 {
@@ -4711,6 +4725,21 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 	else
 		fl6.flowi6_uid = iif ? INVALID_UID : current_uid();
 
+	if (tb[RTA_SPORT]) {
+		err = nla_get_port(tb[RTA_SPORT], &fl6.fl6_sport);
+		if (err)
+			goto errout;
+	}
+
+	if (tb[RTA_DPORT]) {
+		err = nla_get_port(tb[RTA_DPORT], &fl6.fl6_dport);
+		if (err)
+			goto errout;
+	}
+
+	if (tb[RTA_IP_PROTO])
+		fl6.flowi6_proto = nla_get_u8(tb[RTA_IP_PROTO]);
+
 	if (iif) {
 		struct net_device *dev;
 		int flags = 0;
-- 
2.1.4

^ permalink raw reply related

* [PATCH net-next 5/5] selftests: net: initial fib rule tests
From: Roopa Prabhu @ 2018-04-16 20:41 UTC (permalink / raw)
  To: davem; +Cc: netdev, dsa
In-Reply-To: <1523911298-8965-1-git-send-email-roopa@cumulusnetworks.com>

From: Roopa Prabhu <roopa@cumulusnetworks.com>

This adds a first set of tests for fib rule match/action for
ipv4 and ipv6. Initial tests only cover action table.
can be extended to cover other actions in the future.
Uses ip route get to validate the rule lookup.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
---
 tools/testing/selftests/net/Makefile          |   2 +-
 tools/testing/selftests/net/fib_rule_tests.sh | 208 ++++++++++++++++++++++++++
 2 files changed, 209 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/net/fib_rule_tests.sh

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 785fc18..f02ab70 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,7 +5,7 @@ CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
-TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh
+TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh fib_rule_tests.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
new file mode 100755
index 0000000..b28fbc1
--- /dev/null
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -0,0 +1,208 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking IPv4 and IPv6 FIB rules API
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+RTABLE=100
+GW_IP4=192.51.100.2
+SRC_IP=192.51.100.3
+GW_IP6=2001:db8:1::2
+SRC_IP6=2001:db8:1::3
+
+DEV_ADDR=192.51.100.1
+DEV=dummy0
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "        %-60s  [ OK ]\n" "${msg}"
+	else
+		ret=1
+		printf "        %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+		echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+setup()
+{
+	set -e
+	ip netns add testns
+	ip -netns testns link set dev lo up
+
+	ip -netns testns link add dummy0 type dummy
+	ip -netns testns link set dev dummy0 up
+	ip -netns testns address add 198.51.100.1/24 dev dummy0
+	ip -netns testns -6 address add 2001:db8:1::1/64 dev dummy0
+
+	set +e
+}
+
+cleanup()
+{
+	ip -netns testns link del dev dummy0 &> /dev/null
+	ip netns del testns
+}
+
+fib_check_iproute_support()
+{
+    ip rule help 2>&1 | grep -q $1
+    if [ $? -ne 0 ]; then
+	    echo "SKIP: iproute2 get too old, missing $1 match"
+        return 1
+    fi
+
+    ip route get help 2>&1 | grep -q $2
+    if [ $? -ne 0 ]; then
+	    echo "SKIP: iproute2 get too old, missing $2 match"
+        return 1
+    fi
+
+    return 0
+}
+
+fib_rule6_del()
+{
+    ip -netns testns -6 rule del $1
+	log_test $? 0 "rule6 del $1"
+}
+
+fib_rule6_del_by_pref()
+{
+	pref=$(ip -netns testns -6 rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+	ip -netns testns -6 rule del pref $pref
+}
+
+fib_rule6_test_match_n_redirect()
+{
+    match="$1"
+    getmatch="$2"
+
+	ip -netns testns -6 rule add $match table $RTABLE
+	ip -netns testns -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE"
+	log_test $? 0 "rule6 check: $1"
+
+	fib_rule6_del_by_pref "$match"
+	log_test $? 0 "rule6 del by pref: $match"
+}
+
+fib_rule6_test()
+{
+	# setup the pbr redirect route
+	ip -netns testns -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink
+	# test oif match
+	match="oif $DEV"
+	fib_rule6_test_match_n_redirect "$match" "$match" "oif redirect to table"
+
+	match="from $SRC_IP6 iif $DEV"
+	fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table"
+
+    match="tos 0x10"
+    fib_rule6_test_match_n_redirect "$match" "$match" "tos redirect to table"
+
+    match="fwmark 0x64"
+    getmatch="mark 0x64"
+    fib_rule6_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+
+    fib_check_iproute_support "uidrange" "uid"
+    if [ $? -eq 0 ]; then
+        match="uidrange 100-100"
+        getmatch="uid 100"
+        fib_rule6_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+    fi
+
+    fib_check_iproute_support "sport" "sport"
+    if [ $? -eq 0 ]; then
+        match="sport 666 dport 777"
+        fib_rule6_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+    fi
+}
+
+fib_rule4_del()
+{
+    ip -netns testns rule del $1
+	log_test $? 0 "del $1"
+}
+
+fib_rule4_del_by_pref()
+{
+	pref=$(ip -netns testns rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+	ip -netns testns rule del pref $pref
+}
+
+fib_rule4_test_match_n_redirect()
+{
+	match="$1"
+    getmatch="$2"
+
+	ip -netns testns rule add $match table $RTABLE
+	ip -netns testns route get $GW_IP4 $getmatch | grep -q "table $RTABLE"
+	log_test $? 0 "rule4 check: $1"
+
+	fib_rule4_del_by_pref "$match"
+	log_test $? 0 "rule4 del by pref: $match"
+}
+
+fib_rule4_test()
+{
+	# setup the pbr redirect route
+	ip -netns testns route add table $RTABLE default via $GW_IP4 dev $DEV onlink
+
+	# test oif match
+	match="oif $DEV"
+	fib_rule4_test_match_n_redirect "$match" "$match" "oif redirect to table"
+
+	match="from $SRC_IP iif $DEV"
+	fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table"
+
+    match="tos 0x10"
+    fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table"
+
+    match="fwmark 0x64"
+    getmatch="mark 0x64"
+    fib_rule4_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+
+    fib_check_iproute_support "uidrange" "uid"
+    if [ $? -eq 0 ]; then
+        match="uidrange 100-100"
+        getmatch="uid 100"
+        fib_rule4_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+    fi
+
+    fib_check_iproute_support "sport" "sport"
+    if [ $? -eq 0 ]; then
+        match="sport 666 dport 777"
+        fib_rule4_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+    fi
+}
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit 0
+fi
+
+# start clean
+cleanup &> /dev/null
+setup
+fib_rule4_test
+fib_rule6_test
+cleanup
+
+exit $ret
-- 
2.1.4

^ permalink raw reply related

* Re: tcp hang when socket fills up ?
From: Marcelo Ricardo Leitner @ 2018-04-16 20:43 UTC (permalink / raw)
  To: Dominique Martinet; +Cc: Eric Dumazet, Michal Kubecek, netdev, Florian Westphal
In-Reply-To: <20180414015515.GA24798@nautica>

On Sat, Apr 14, 2018 at 03:55:15AM +0200, Dominique Martinet wrote:
> 16:49:26.724457 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 25134, win 646, options [nop,nop,TS val 1617129467 ecr 1313937599], length 0
> 16:49:26.726081 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 26508, win 669, options [nop,nop,TS val 1617129471 ecr 1313937599], length 0
> 16:49:26.726391 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 27882, win 691, options [nop,nop,TS val 1617129471 ecr 1313937601], length 0
> 16:49:26.726962 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 29256, win 714, options [nop,nop,TS val 1617129472 ecr 1313937601], length 0
> 16:49:26.727614 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 30630, win 737, options [nop,nop,TS val 1617129473 ecr 1313937602], length 0
> 16:49:26.728084 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 32004, win 759, options [nop,nop,TS val 1617129473 ecr 1313937602], length 0

[A] (packet above)

> 16:49:26.728507 IP <server local ip>.13317 > <client public ip>.31872: Flags [.], seq 59484:60858, ack 4190, win 307, options [nop,nop,TS val 1313937635 ecr 1617129473], length 1374
> 16:49:26.728511 IP <server local ip>.13317 > <client public ip>.31872: Flags [.], seq 60858:62232, ack 4190, win 307, options [nop,nop,TS val 1313937635 ecr 1617129473], length 1374
> 16:49:26.729531 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 33378, win 782, options [nop,nop,TS val 1617129475 ecr 1313937607], length 0
> 16:49:26.730002 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 34752, win 805, options [nop,nop,TS val 1617129475 ecr 1313937607], length 0
> 16:49:26.730340 IP <server local ip>.13317 > <client public ip>.31872: Flags [.], seq 62232:63606, ack 4190, win 307, options [nop,nop,TS val 1313937636 ecr 1617129473], length 1374
> 16:49:26.730344 IP <server local ip>.13317 > <client public ip>.31872: Flags [.], seq 63606:64980, ack 4190, win 307, options [nop,nop,TS val 1313937636 ecr 1617129473], length 1374
> 16:49:26.731398 IP <server local ip>.13317 > <client public ip>.31872: Flags [.], seq 64980:66354, ack 4190, win 307, options [nop,nop,TS val 1313937638 ecr 1617129473], length 1374
> 16:49:26.731402 IP <server local ip>.13317 > <client public ip>.31872: Flags [.], seq 66354:67728, ack 4190, win 307, options [nop,nop,TS val 1313937638 ecr 1617129473], length 1374
> 16:49:26.731634 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 36126, win 827, options [nop,nop,TS val 1617129476 ecr 1313937607], length 0
> 16:49:26.732955 IP <server local ip>.13317 > <client public ip>.31872: Flags [.], seq 67728:69102, ack 4190, win 307, options [nop,nop,TS val 1313937639 ecr 1617129473], length 1374
> 16:49:26.732963 IP <server local ip>.13317 > <client public ip>.31872: Flags [.], seq 69102:70476, ack 4190, win 307, options [nop,nop,TS val 1313937639 ecr 1617129473], length 1374
> 16:49:26.733956 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 37500, win 850, options [nop,nop,TS val 1617129476 ecr 1313937607], length 0
> 16:49:26.734242 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 38874, win 872, options [nop,nop,TS val 1617129477 ecr 1313937608], length 0
> 16:49:26.734653 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 40248, win 895, options [nop,nop,TS val 1617129478 ecr 1313937608], length 0
> 16:49:26.735042 IP <server local ip>.13317 > <client public ip>.31872: Flags [.], seq 70476:71850, ack 4190, win 307, options [nop,nop,TS val 1313937641 ecr 1617129473], length 1374
> 16:49:26.735046 IP <server local ip>.13317 > <client public ip>.31872: Flags [.], seq 71850:73224, ack 4190, win 307, options [nop,nop,TS val 1313937641 ecr 1617129473], length 1374
> 16:49:26.735334 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 41622, win 918, options [nop,nop,TS val 1617129478 ecr 1313937609], length 0
> 16:49:26.736005 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 42996, win 940, options [nop,nop,TS val 1617129478 ecr 1313937609], length 0
> 16:49:26.736402 IP <server local ip>.13317 > <client public ip>.31872: Flags [.], seq 73224:74598, ack 4190, win 307, options [nop,nop,TS val 1313937643 ecr 1617129473], length 1374
> 16:49:26.736408 IP <server local ip>.13317 > <client public ip>.31872: Flags [.], seq 74598:75972, ack 4190, win 307, options [nop,nop,TS val 1313937643 ecr 1617129473], length 1374
> 16:49:26.738561 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 44370, win 963, options [nop,nop,TS val 1617129482 ecr 1313937616], length 0
> 16:49:26.739539 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 45744, win 986, options [nop,nop,TS val 1617129482 ecr 1313937616], length 0
> 16:49:26.739882 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 47118, win 1008, options [nop,nop,TS val 1617129484 ecr 1313937617], length 0
> 16:49:26.740255 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 48492, win 1031, options [nop,nop,TS val 1617129484 ecr 1313937617], length 0
> 16:49:26.746756 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 49866, win 1053, options [nop,nop,TS val 1617129493 ecr 1313937627], length 0
> 16:49:26.747923 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 51240, win 1076, options [nop,nop,TS val 1617129494 ecr 1313937627], length 0
> 16:49:26.749083 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 52614, win 1099, options [nop,nop,TS val 1617129495 ecr 1313937629], length 0
> 16:49:26.750171 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 53988, win 1121, options [nop,nop,TS val 1617129496 ecr 1313937629], length 0
> 16:49:26.750808 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 55362, win 1144, options [nop,nop,TS val 1617129497 ecr 1313937629], length 0
> 16:49:26.754648 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 56736, win 1167, options [nop,nop,TS val 1617129500 ecr 1313937629], length 0
> 16:49:26.755985 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 58110, win 1189, options [nop,nop,TS val 1617129501 ecr 1313937630], length 0
> 16:49:26.758513 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 59484, win 1212, options [nop,nop,TS val 1617129502 ecr 1313937630], length 0
> 16:49:26.759096 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 60858, win 1234, options [nop,nop,TS val 1617129503 ecr 1313937635], length 0
> 16:49:26.759421 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 62232, win 1257, options [nop,nop,TS val 1617129503 ecr 1313937635], length 0
> 16:49:26.759755 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 63606, win 1280, options [nop,nop,TS val 1617129504 ecr 1313937636], length 0
> 16:49:26.760653 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 64980, win 1302, options [nop,nop,TS val 1617129505 ecr 1313937636], length 0
> 16:49:26.761453 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 66354, win 1325, options [nop,nop,TS val 1617129506 ecr 1313937638], length 0
> 16:49:26.762199 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 67728, win 1348, options [nop,nop,TS val 1617129507 ecr 1313937638], length 0
> 16:49:26.763547 IP <client public ip>.31872 > <server local ip>.13317: Flags [P.], seq 4190:4226, ack 67728, win 1348, options [nop,nop,TS val 1617129507 ecr 1313937638], length 36
> 16:49:26.763553 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 70476, win 1393, options [nop,nop,TS val 1617129508 ecr 1313937639], length 0
> 16:49:26.764298 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 73224, win 1438, options [nop,nop,TS val 1617129509 ecr 1313937641], length 0
> 16:49:26.764676 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 75972, win 1444, options [nop,nop,TS val 1617129510 ecr 1313937643], length 0
> 16:49:26.807754 IP <server local ip>.13317 > <client public ip>.31872: Flags [.], seq 75972:77346, ack 4190, win 307, options [nop,nop,TS val 1313937714 ecr 1617129473], length 1374
> 16:49:26.876467 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 77346, win 1444, options [nop,nop,TS val 1617129620 ecr 1313937714], length 0
> 16:49:27.048760 IP <server local ip>.13317 > <client public ip>.31872: Flags [.], seq 32004:33378, ack 4190, win 307, options [nop,nop,TS val 1313937955 ecr 1617129473], length 1374

Probably bogus retrans for conntrack, but OUTPUT is not filtered.

> 16:49:27.051791 IP <client public ip>.31872 > <server local ip>.13317: Flags [P.], seq 4190:4226, ack 77346, win 1444, options [nop,nop,TS val 1617129762 ecr 1313937714], length 36
> 16:49:27.076444 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 77346, win 1444, options [nop,nop,TS val 1617129822 ecr 1313937714,nop,nop,sack 1 {32004:33378}], length 0

I guess the issue lies with the dup-sack here.

AFAIR Disabling timestamps also disables SACK, which would avoid the
issue as conntrack will not see these as they won't happen anymore.

Then it gets locked into a loopy behavior:

> 16:49:27.371182 IP <client public ip>.31872 > <server local ip>.13317: Flags [P.], seq 4190:4226, ack 77346, win 1444, options [nop,nop,TS val 1617130018 ecr 1313937714], length 36

> 16:49:27.519862 IP <server local ip>.13317 > <client public ip>.31872: Flags [.], seq 32004:33378, ack 4190, win 307, options [nop,nop,TS val 1313938426 ecr 1617129473], length 1374
> 16:49:27.547662 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 77346, win 1444, options [nop,nop,TS val 1617130293 ecr 1313937714,nop,nop,sack 1 {32004:33378}], length 0
> 16:49:27.883372 IP <client public ip>.31872 > <server local ip>.13317: Flags [P.], seq 4190:4226, ack 77346, win 1444, options [nop,nop,TS val 1617130530 ecr 1313937714], length 36

Cycle 1

> 16:49:28.511861 IP <server local ip>.13317 > <client public ip>.31872: Flags [.], seq 32004:33378, ack 4190, win 307, options [nop,nop,TS val 1313939418 ecr 1617129473], length 1374
> 16:49:28.538891 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 77346, win 1444, options [nop,nop,TS val 1617131285 ecr 1313937714,nop,nop,sack 1 {32004:33378}], length 0
> 16:49:28.907197 IP <client public ip>.31872 > <server local ip>.13317: Flags [P.], seq 4190:4226, ack 77346, win 1444, options [nop,nop,TS val 1617131554 ecr 1313937714], length 36

Cycle 2

> 16:49:30.431864 IP <server local ip>.13317 > <client public ip>.31872: Flags [.], seq 32004:33378, ack 4190, win 307, options [nop,nop,TS val 1313941338 ecr 1617129473], length 1374
> 16:49:30.459127 IP <client public ip>.31872 > <server local ip>.13317: Flags [.], ack 77346, win 1444, options [nop,nop,TS val 1617133204 ecr 1313937714,nop,nop,sack 1 {32004:33378}], length 0
> 16:49:30.955388 IP <client public ip>.31872 > <server local ip>.13317: Flags [P.], seq 4190:4226, ack 77346, win 1444, options [nop,nop,TS val 1617133602 ecr 1313937714], length 36

Cycle 3...

Based on server RTO.

But I can't tell why packet [A] and subsequent packets from client
never made to the server's socket.

^ permalink raw reply

* Re: [RFC net-next 1/2] net: net-porcfs: Reduce rcu lock critical section
From: Saeed Mahameed @ 2018-04-16 20:50 UTC (permalink / raw)
  To: eric.dumazet@gmail.com, davem@davemloft.net; +Cc: netdev@vger.kernel.org
In-Reply-To: <1523560344.3587.40.camel@mellanox.com>

On Thu, 2018-04-12 at 19:12 +0000, Saeed Mahameed wrote:
> On Wed, 2018-04-11 at 19:59 -0700, Eric Dumazet wrote:
> > 
> > On 04/11/2018 04:47 PM, Saeed Mahameed wrote:
> > > 
> > > Well if we allow devices to access HW counters via FW command
> > > interfaces in ndo_get_stats and by testing mlx5 where we query up
> > > to 5
> > > hw registers, it could take 100us, still this is way smaller than
> > > 10sec
> > >  :) and it is really a nice rate to fetch HW stats on demand.
> > 
> > If hardware stats are slower than software ones, maybe it is time
> > to
> > use software stats,
> > instead of changing the whole stack ?
> > 
> 
> We already have SW stats for [rx/tx]_[packets/bytes] but for
> [rx/tx]_[error/drop] etc .. they can only be grabbed from HW.
> 
> We don't want to report only partial counters to get_stats ndo just
> to
> avoid sleeping.
> 
> > There are very few devices drivers having issues like that.
> > 

Dave, Eric, I would like to know whether it is ok to have this change
in the kernel (make get_stats ndo "might sleep")? I really didn't get
any convincing feedback to not do this change.

For smart nics where a lot of statistics computations and
driver/Hardware communication go through Firmware, there is not much we
can do in the driver in order to provide accurate and "fresh"
statistics other than talking directly to Firmware through command
interfaces that "might sleep". Currently this is done through a
background thread caching hardware statistics 5 times a second which is
wasteful.

In order to complete this work I need to take care of netlink, ovs and
bonding (similar to what i did in this patch).

Plus I might need to introduce a new dev hold/put API that guarantees
the netdev is not uninitialized while it is being briefly held, this is
needed since the current device chain locks guarantees that by design.
(Should be a relatively small change).

Go/No Go?

Thanks,
Saeed.



^ permalink raw reply

* [PATCH net-next] selftest: tc_flower: add testcase for 'ip_flags'
From: Davide Caratti @ 2018-04-16 20:59 UTC (permalink / raw)
  To: Jiri Pirko, David Ahern, David S. Miller; +Cc: netdev, Pieter Jansen van Vuuren

Signed-off-by: Davide Caratti <dcaratti@redhat.com>
---
 .../testing/selftests/net/forwarding/tc_flower.sh  | 70 ++++++++++++++++++++++
 1 file changed, 70 insertions(+)

diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
index 032b882adfc0..0c54059f1875 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -149,6 +149,74 @@ match_src_ip_test()
 	log_test "src_ip match ($tcflags)"
 }
 
+match_ip_flags_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags ip_flags frag action continue
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags ip_flags firstfrag action continue
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags ip_flags nofirstfrag action continue
+	tc filter add dev $h2 ingress protocol ip pref 4 handle 104 flower \
+		$tcflags ip_flags nofrag action drop
+
+	$MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip "frag=0" -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on wrong frag filter (nofrag)"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_fail $? "Matched on wrong firstfrag filter (nofrag)"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Did not match on nofirstfrag filter (nofrag) "
+
+	tc_check_packets "dev $h2 ingress" 104 1
+	check_err $? "Did not match on nofrag filter (nofrag)"
+
+	$MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip "frag=0,mf" -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match on frag filter (1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match fistfrag filter (1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Matched on wrong nofirstfrag filter (1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 104 1
+	check_err $? "Match on wrong nofrag filter (1stfrag)"
+
+	$MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip "frag=256,mf" -q
+	$MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip "frag=256" -q
+
+	tc_check_packets "dev $h2 ingress" 101 3
+	check_err $? "Did not match on frag filter (no1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Matched on wrong firstfrag filter (no1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 103 3
+	check_err $? "Did not match on nofirstfrag filter (no1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 104 1
+	check_err $? "Matched on nofrag filter (no1stfrag)"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+	tc filter del dev $h2 ingress protocol ip pref 4 handle 104 flower
+
+	log_test "ip_flags match ($tcflags)"
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
@@ -181,6 +249,7 @@ match_dst_mac_test
 match_src_mac_test
 match_dst_ip_test
 match_src_ip_test
+match_ip_flags_test
 
 tc_offload_check
 if [[ $? -ne 0 ]]; then
@@ -191,6 +260,7 @@ else
 	match_src_mac_test
 	match_dst_ip_test
 	match_src_ip_test
+	match_ip_flags_test
 fi
 
 exit $EXIT_STATUS
-- 
2.14.3

^ permalink raw reply related

* Re: [RFC net-next 1/2] net: net-porcfs: Reduce rcu lock critical section
From: Eric Dumazet @ 2018-04-16 21:07 UTC (permalink / raw)
  To: Saeed Mahameed, eric.dumazet@gmail.com, davem@davemloft.net
  Cc: netdev@vger.kernel.org
In-Reply-To: <1523911853.4579.19.camel@mellanox.com>



On 04/16/2018 01:50 PM, Saeed Mahameed wrote:
> 
> Dave, Eric, I would like to know whether it is ok to have this change
> in the kernel (make get_stats ndo "might sleep")? I really didn't get
> any convincing feedback to not do this change.

Fact that you were not convinced does not mean you were right.

I would rather not change the current behavior and risk nasty bugs.

Having SNMP daemons blocked while some device is sleeping in its ndo_get_stats()
is concerning, since it is currently an immediate operation.

ethtool -S can definitely sleep if you need fancy stats.

^ permalink raw reply

* Re: XDP performance regression due to CONFIG_RETPOLINE Spectre V2
From: Jesper Dangaard Brouer @ 2018-04-16 21:07 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: xdp-newbies@vger.kernel.org, netdev@vger.kernel.org,
	Christoph Hellwig, David Woodhouse, William Tu,
	Björn Töpel, Karlsson, Magnus, Alexander Duyck,
	Arnaldo Carvalho de Melo, brouer
In-Reply-To: <20180416122706.GA20624@infradead.org>

On Mon, 16 Apr 2018 05:27:06 -0700
Christoph Hellwig <hch@infradead.org> wrote:

> Can you try the following hack which avoids indirect calls entirely
> for the fast path direct mapping case?
> 
> ---
> From b256a008c1b305e6a1c2afe7c004c54ad2e96d4b Mon Sep 17 00:00:00 2001
> From: Christoph Hellwig <hch@lst.de>
> Date: Mon, 16 Apr 2018 14:18:14 +0200
> Subject: dma-mapping: bypass dma_ops for direct mappings
> 
> Reportedly the retpoline mitigation for spectre causes huge penalties
> for indirect function calls.  This hack bypasses the dma_ops mechanism
> for simple direct mappings.

I did below to get it compiling, and working...

On X86 swiotlb fallback (via get_dma_ops -> get_arch_dma_ops) to use
x86_swiotlb_dma_ops, instead of swiotlb_dma_ops.  I also included that
in below fix patch.

Performance improved to 8.9 Mpps from approx 6.5Mpps.

(This was without my bulking for net_device->ndo_xdp_xmit, so that
number should improve more).

---
[PATCH RFC] fixups for Hellwig's DMA avoid retpoline overhead patch

From: Jesper Dangaard Brouer <brouer@redhat.com>

Performance improved to 8.9 Mpps
    8917613 pkt/s

it was around 6.5 Mpps before.
---
 arch/x86/kernel/pci-swiotlb.c                 |    3 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |    1 +
 include/linux/dma-mapping.h                   |   14 +++++++++++++-
 lib/Kconfig                                   |    2 +-
 lib/Makefile                                  |    1 +
 lib/dma-direct.c                              |    2 ++
 lib/swiotlb.c                                 |    1 +
 7 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c
index 0ee0f8f34251..46207e288587 100644
--- a/arch/x86/kernel/pci-swiotlb.c
+++ b/arch/x86/kernel/pci-swiotlb.c
@@ -48,7 +48,7 @@ void x86_swiotlb_free_coherent(struct device *dev, size_t size,
 		dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs);
 }
 
-static const struct dma_map_ops x86_swiotlb_dma_ops = {
+const struct dma_map_ops x86_swiotlb_dma_ops = {
 	.mapping_error = swiotlb_dma_mapping_error,
 	.alloc = x86_swiotlb_alloc_coherent,
 	.free = x86_swiotlb_free_coherent,
@@ -62,6 +62,7 @@ static const struct dma_map_ops x86_swiotlb_dma_ops = {
 	.unmap_page = swiotlb_unmap_page,
 	.dma_supported = NULL,
 };
+EXPORT_SYMBOL(x86_swiotlb_dma_ops);
 
 /*
  * pci_swiotlb_detect_override - set swiotlb to 1 if necessary
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 0daccaf72a30..6d2e3f75febc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -10297,6 +10297,7 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		return err;
 
 	if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
+		pr_info("XXX %s() dma_set_mask_and_coherent\n", __func__);
 		pci_using_dac = 1;
 	} else {
 		err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index f2fb5aec7626..7fa92664ebfd 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -622,6 +622,7 @@ static inline int dma_supported(struct device *dev, u64 mask)
 }
 
 extern const struct dma_map_ops swiotlb_dma_ops;
+extern const struct dma_map_ops x86_swiotlb_dma_ops;
 
 #ifndef HAVE_ARCH_DMA_SET_MASK
 static inline int dma_set_mask(struct device *dev, u64 mask)
@@ -632,12 +633,23 @@ static inline int dma_set_mask(struct device *dev, u64 mask)
 	dma_check_mask(dev, mask);
 
 	*dev->dma_mask = mask;
+#ifdef CONFIG_DMA_DIRECT_OPS
 	if (dev->dma_ops == &dma_direct_ops ||
+# ifdef CONFIG_SWIOTLB
 	    (dev->dma_ops == &swiotlb_dma_ops &&
-	     mask == DMA_BIT_MASK(64)))
+	     mask == DMA_BIT_MASK(64)) ||
+#  ifdef CONFIG_X86
+	    (get_dma_ops(dev) == &x86_swiotlb_dma_ops &&
+	     mask == DMA_BIT_MASK(64))
+#  endif /* CONFIG_X86 */
+# endif /* CONFIG_SWIOTLB */
+	   )
 		dev->is_dma_direct = true;
 	else
+#endif /* CONFIG_DMA_DIRECT_OPS */
 		dev->is_dma_direct = false;
+
+	pr_info("XXX: %s() DMA is direct: %d\n", __func__, dev->is_dma_direct);
 	return 0;
 }
 #endif
diff --git a/lib/Kconfig b/lib/Kconfig
index e96089499371..6eba2bcf468a 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -416,7 +416,7 @@ config SGL_ALLOC
 config DMA_DIRECT_OPS
 	bool
 	depends on HAS_DMA && (!64BIT || ARCH_DMA_ADDR_T_64BIT)
-	default n
+	default y
 
 config DMA_VIRT_OPS
 	bool
diff --git a/lib/Makefile b/lib/Makefile
index a90d4fcd748f..df4885eabf9c 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -29,6 +29,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
 lib-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o
+#lib-y += dma-direct.o
 lib-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o
 
 lib-y	+= kobject.o klist.o
diff --git a/lib/dma-direct.c b/lib/dma-direct.c
index ea69f8777e7f..d945efea3dae 100644
--- a/lib/dma-direct.c
+++ b/lib/dma-direct.c
@@ -107,6 +107,7 @@ dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
 		return DIRECT_MAPPING_ERROR;
 	return dma_addr;
 }
+EXPORT_SYMBOL(dma_direct_map_page);
 
 int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
 		int nents, enum dma_data_direction dir, unsigned long attrs)
@@ -125,6 +126,7 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl,
 
 	return nents;
 }
+EXPORT_SYMBOL(dma_direct_map_sg);
 
 int dma_direct_supported(struct device *dev, u64 mask)
 {
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index c43ec2271469..ecb70f5e95ba 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -1132,4 +1132,5 @@ const struct dma_map_ops swiotlb_dma_ops = {
 	.unmap_page		= swiotlb_unmap_page,
 	.dma_supported		= swiotlb_dma_supported,
 };
+EXPORT_SYMBOL(swiotlb_dma_ops);
 #endif /* CONFIG_DMA_DIRECT_OPS */

^ permalink raw reply related

* Re: [PATCH net] net: Fix one possible memleak in ip_setup_cork
From: Eric Dumazet @ 2018-04-16 21:18 UTC (permalink / raw)
  To: David Miller, gfree.wind; +Cc: kuznet, netdev
In-Reply-To: <20180416.125814.883736880189372257.davem@davemloft.net>



On 04/16/2018 09:58 AM, David Miller wrote:
> From: gfree.wind@vip.163.com
> Date: Mon, 16 Apr 2018 10:16:45 +0800
> 
>> From: Gao Feng <gfree.wind@vip.163.com>
>>
>> It would allocate memory in this function when the cork->opt is NULL. But
>> the memory isn't freed if failed in the latter rt check, and return error
>> directly. It causes the memleak if its caller is ip_make_skb which also
>> doesn't free the cork->opt when meet a error.
>>
>> Now move the rt check ahead to avoid the memleak.
>>
>> Signed-off-by: Gao Feng <gfree.wind@vip.163.com>
> 
> Looks good, applied and queued up for -stable.
> 
> I guess in the other code paths, ip_flush_pending_frames() or similar
> would clean up the in-sock cork information.
> 

I am not sure ip_make_skb() can be called with a NULL rt.

Patch makes no harm, but does not seem to fix a bug.

^ permalink raw reply

* Proposal
From: MS Zeliha Omer Faruk @ 2018-04-16 21:19 UTC (permalink / raw)





Hello

Greeetings to you please did you get my previous email regarding my
investment proposal last week friday ?

MS.Zeliha ömer faruk
zeliha.omer.faruk@gmail.com

^ permalink raw reply

* Re: [PATCH v2] KEYS: DNS: limit the length of option strings
From: Eric Biggers @ 2018-04-16 21:31 UTC (permalink / raw)
  To: David Howells; +Cc: keyrings, netdev, Mark Rutland, Eric Biggers
In-Reply-To: <20180402192035.GC59810@gmail.com>

On Mon, Apr 02, 2018 at 12:20:35PM -0700, Eric Biggers wrote:
> On Fri, Mar 23, 2018 at 01:21:22PM -0700, Eric Biggers wrote:
> > On Mon, Mar 12, 2018 at 10:57:07AM -0700, Eric Biggers wrote:
> > > On Wed, Mar 07, 2018 at 03:54:37PM +0000, David Howells wrote:
> > > > Eric Biggers <ebiggers3@gmail.com> wrote:
> > > > 
> > > > > Fix it by limiting option strings (combined name + value) to a much more
> > > > > reasonable 128 bytes.  The exact limit is arbitrary, but currently the
> > > > > only recognized option is formatted as "dnserror=%lu" which fits well
> > > > > within this limit.
> > > > 
> > > > There will be more options coming ("ipv4", "ipv6") but they shouldn't overrun
> > > > this limit and we can always extend the limit if need be.
> > > > 
> > > > David
> > > 
> > > David (Howells) do you want to take this patch through the keyrings tree or
> > > should I ask David Miller to take it through net-next?
> > > 
> > > Eric
> > 
> > Ping.
> 
> Ping again.

Times up.  I'll resend for net-next.

Eric

^ permalink raw reply

* [PATCH RESEND net-next v2] KEYS: DNS: limit the length of option strings
From: Eric Biggers @ 2018-04-16 21:29 UTC (permalink / raw)
  To: netdev, David S . Miller; +Cc: keyrings, Mark Rutland, Eric Biggers

From: Eric Biggers <ebiggers@google.com>

Adding a dns_resolver key whose payload contains a very long option name
resulted in that string being printed in full.  This hit the WARN_ONCE()
in set_precision() during the printk(), because printk() only supports a
precision of up to 32767 bytes:

    precision 1000000 too large
    WARNING: CPU: 0 PID: 752 at lib/vsprintf.c:2189 vsnprintf+0x4bc/0x5b0

Fix it by limiting option strings (combined name + value) to a much more
reasonable 128 bytes.  The exact limit is arbitrary, but currently the
only recognized option is formatted as "dnserror=%lu" which fits well
within this limit.

Also ratelimit the printks.

Reproducer:

    perl -e 'print "#", "A" x 1000000, "\x00"' | keyctl padd dns_resolver desc @s

This bug was found using syzkaller.

Reported-by: Mark Rutland <mark.rutland@arm.com>
Fixes: 4a2d789267e0 ("DNS: If the DNS server returns an error, allow that to be cached [ver #2]")
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
 net/dns_resolver/dns_key.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 8396705deffc..40c851693f77 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -91,9 +91,9 @@ dns_resolver_preparse(struct key_preparsed_payload *prep)
 
 			next_opt = memchr(opt, '#', end - opt) ?: end;
 			opt_len = next_opt - opt;
-			if (!opt_len) {
-				printk(KERN_WARNING
-				       "Empty option to dns_resolver key\n");
+			if (opt_len <= 0 || opt_len > 128) {
+				pr_warn_ratelimited("Invalid option length (%d) for dns_resolver key\n",
+						    opt_len);
 				return -EINVAL;
 			}
 
@@ -127,10 +127,8 @@ dns_resolver_preparse(struct key_preparsed_payload *prep)
 			}
 
 		bad_option_value:
-			printk(KERN_WARNING
-			       "Option '%*.*s' to dns_resolver key:"
-			       " bad/missing value\n",
-			       opt_nlen, opt_nlen, opt);
+			pr_warn_ratelimited("Option '%*.*s' to dns_resolver key: bad/missing value\n",
+					    opt_nlen, opt_nlen, opt);
 			return -EINVAL;
 		} while (opt = next_opt + 1, opt < end);
 	}
-- 
2.17.0.484.g0c8726318c-goog

^ permalink raw reply related

* Re: [PATCH net-next 1/2] PCI: Add two more values for PCIe Max_Read_Request_Size
From: Bjorn Helgaas @ 2018-04-16 21:42 UTC (permalink / raw)
  To: Heiner Kallweit
  Cc: Bjorn Helgaas, David Miller, Realtek linux nic maintainers,
	linux-pci@vger.kernel.org, netdev@vger.kernel.org
In-Reply-To: <ab8696d7-59e0-1f24-0ead-757057eb50cf@gmail.com>

On Mon, Apr 16, 2018 at 09:37:13PM +0200, Heiner Kallweit wrote:
> This patch adds missing values for the max read request size.
> E.g. network driver r8169 uses a value of 4K.
> 
> Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>

I'd prefer a subject line with more details, e.g.,

  PCI: Add #defines for 2K and 4K Max Read Request Size

Acked-by: Bjorn Helgaas <bhelgaas@google.com>

I suspect conflicts are more likely in r8169.c so it might make more
sense to route these through the netdev tree.  I'd also be happy to
take them, so let me know if you want me to take them, David.

> ---
>  include/uapi/linux/pci_regs.h | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
> index 0c79eac5..699257fb 100644
> --- a/include/uapi/linux/pci_regs.h
> +++ b/include/uapi/linux/pci_regs.h
> @@ -506,6 +506,8 @@
>  #define  PCI_EXP_DEVCTL_READRQ_256B  0x1000 /* 256 Bytes */
>  #define  PCI_EXP_DEVCTL_READRQ_512B  0x2000 /* 512 Bytes */
>  #define  PCI_EXP_DEVCTL_READRQ_1024B 0x3000 /* 1024 Bytes */
> +#define  PCI_EXP_DEVCTL_READRQ_2048B 0x4000 /* 2048 Bytes */
> +#define  PCI_EXP_DEVCTL_READRQ_4096B 0x5000 /* 4096 Bytes */
>  #define  PCI_EXP_DEVCTL_BCR_FLR 0x8000  /* Bridge Configuration Retry / FLR */
>  #define PCI_EXP_DEVSTA		10	/* Device Status */
>  #define  PCI_EXP_DEVSTA_CED	0x0001	/* Correctable Error Detected */
> -- 
> 2.17.0
> 
> 

^ permalink raw reply

* [PATCH bpf-next 0/3] Add missing types to bpftool, libbpf
From: Andrey Ignatov @ 2018-04-16 21:41 UTC (permalink / raw)
  To: ast, daniel; +Cc: Andrey Ignatov, netdev, kernel-team

Add support for various BPF prog types and attach types that have been added to
kernel recently but not to bpftool or libbpf yet.


Andrey Ignatov (3):
  bpftool: Add missing prog types and attach types
  libbpf: Support guessing post_bind{4,6} progs
  libbpf: Type functions for raw tracepoints

 tools/bpf/bpftool/cgroup.c | 19 ++++++++++++++++---
 tools/bpf/bpftool/prog.c   |  3 +++
 tools/lib/bpf/libbpf.c     |  8 ++++++++
 tools/lib/bpf/libbpf.h     |  2 ++
 4 files changed, 29 insertions(+), 3 deletions(-)

-- 
2.9.5

^ permalink raw reply

* [PATCH bpf-next 2/3] libbpf: Support guessing post_bind{4,6} progs
From: Andrey Ignatov @ 2018-04-16 21:41 UTC (permalink / raw)
  To: ast, daniel; +Cc: Andrey Ignatov, netdev, kernel-team
In-Reply-To: <cover.1523914232.git.rdna@fb.com>

libbpf can guess prog type and expected attach type based on section
name. Add hints for "cgroup/post_bind4" and "cgroup/post_bind6" section
names.

Existing "cgroup/sock" is not changed, i.e. expected_attach_type for it
is not set to `BPF_CGROUP_INET_SOCK_CREATE`, for backward compatibility.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
---
 tools/lib/bpf/libbpf.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 5922443..0fcc447 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1859,6 +1859,9 @@ static void bpf_program__set_expected_attach_type(struct bpf_program *prog,
 
 #define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_FULL(string, ptype, 0)
 
+#define BPF_S_PROG_SEC(string, ptype) \
+	BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK, ptype)
+
 #define BPF_SA_PROG_SEC(string, ptype) \
 	BPF_PROG_SEC_FULL(string, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, ptype)
 
@@ -1889,10 +1892,13 @@ static const struct {
 	BPF_SA_PROG_SEC("cgroup/bind6",	BPF_CGROUP_INET6_BIND),
 	BPF_SA_PROG_SEC("cgroup/connect4", BPF_CGROUP_INET4_CONNECT),
 	BPF_SA_PROG_SEC("cgroup/connect6", BPF_CGROUP_INET6_CONNECT),
+	BPF_S_PROG_SEC("cgroup/post_bind4", BPF_CGROUP_INET4_POST_BIND),
+	BPF_S_PROG_SEC("cgroup/post_bind6", BPF_CGROUP_INET6_POST_BIND),
 };
 
 #undef BPF_PROG_SEC
 #undef BPF_PROG_SEC_FULL
+#undef BPF_S_PROG_SEC
 #undef BPF_SA_PROG_SEC
 
 static int bpf_program__identify_section(struct bpf_program *prog)
-- 
2.9.5

^ permalink raw reply related

* [PATCH bpf-next 1/3] bpftool: Add missing prog types and attach types
From: Andrey Ignatov @ 2018-04-16 21:41 UTC (permalink / raw)
  To: ast, daniel; +Cc: Andrey Ignatov, netdev, kernel-team
In-Reply-To: <cover.1523914232.git.rdna@fb.com>

Add missing prog types to `bpftool prog` and missing attach types to
`bpftool cgroup`.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
---
 tools/bpf/bpftool/cgroup.c | 19 ++++++++++++++++---
 tools/bpf/bpftool/prog.c   |  3 +++
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index cae32a6..8689916 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -16,15 +16,28 @@
 #define HELP_SPEC_ATTACH_FLAGS						\
 	"ATTACH_FLAGS := { multi | override }"
 
-#define HELP_SPEC_ATTACH_TYPES						\
-	"ATTACH_TYPE := { ingress | egress | sock_create | sock_ops | device }"
+#define HELP_SPEC_ATTACH_TYPES						       \
+	"       ATTACH_TYPE := { ingress | egress | sock_create |\n"	       \
+	"                        sock_ops | stream_parser |\n"		       \
+	"                        stream_verdict | device | msg_verdict |\n"    \
+	"                        bind4 | bind6 | connect4 | connect6 |\n"      \
+	"                        post_bind4 | post_bind6 }"
 
 static const char * const attach_type_strings[] = {
 	[BPF_CGROUP_INET_INGRESS] = "ingress",
 	[BPF_CGROUP_INET_EGRESS] = "egress",
 	[BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
 	[BPF_CGROUP_SOCK_OPS] = "sock_ops",
+	[BPF_SK_SKB_STREAM_PARSER] = "stream_parser",
+	[BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict",
 	[BPF_CGROUP_DEVICE] = "device",
+	[BPF_SK_MSG_VERDICT] = "msg_verdict",
+	[BPF_CGROUP_INET4_BIND] = "bind4",
+	[BPF_CGROUP_INET6_BIND] = "bind6",
+	[BPF_CGROUP_INET4_CONNECT] = "connect4",
+	[BPF_CGROUP_INET6_CONNECT] = "connect6",
+	[BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
+	[BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
 	[__MAX_BPF_ATTACH_TYPE] = NULL,
 };
 
@@ -282,7 +295,7 @@ static int do_help(int argc, char **argv)
 		"       %s %s detach CGROUP ATTACH_TYPE PROG\n"
 		"       %s %s help\n"
 		"\n"
-		"       " HELP_SPEC_ATTACH_TYPES "\n"
+		HELP_SPEC_ATTACH_TYPES "\n"
 		"       " HELP_SPEC_ATTACH_FLAGS "\n"
 		"       " HELP_SPEC_PROGRAM "\n"
 		"       " HELP_SPEC_OPTIONS "\n"
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index f7a8108..548adb9 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -68,6 +68,9 @@ static const char * const prog_type_name[] = {
 	[BPF_PROG_TYPE_SOCK_OPS]	= "sock_ops",
 	[BPF_PROG_TYPE_SK_SKB]		= "sk_skb",
 	[BPF_PROG_TYPE_CGROUP_DEVICE]	= "cgroup_device",
+	[BPF_PROG_TYPE_SK_MSG]		= "sk_msg",
+	[BPF_PROG_TYPE_RAW_TRACEPOINT]	= "raw_tracepoint",
+	[BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
 };
 
 static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
-- 
2.9.5

^ permalink raw reply related

* [PATCH bpf-next 3/3] libbpf: Type functions for raw tracepoints
From: Andrey Ignatov @ 2018-04-16 21:41 UTC (permalink / raw)
  To: ast, daniel; +Cc: Andrey Ignatov, netdev, kernel-team
In-Reply-To: <cover.1523914232.git.rdna@fb.com>

Add missing pieces for BPF_PROG_TYPE_RAW_TRACEPOINT in libbpf:
* is- and set- functions;
* support guessing prog type.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
---
 tools/lib/bpf/libbpf.c | 2 ++
 tools/lib/bpf/libbpf.h | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 0fcc447..3d35bac 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1845,6 +1845,7 @@ BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
 BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
 BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
 BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);
+BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);
 BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);
 BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT);
 
@@ -1877,6 +1878,7 @@ static const struct {
 	BPF_PROG_SEC("classifier",	BPF_PROG_TYPE_SCHED_CLS),
 	BPF_PROG_SEC("action",		BPF_PROG_TYPE_SCHED_ACT),
 	BPF_PROG_SEC("tracepoint/",	BPF_PROG_TYPE_TRACEPOINT),
+	BPF_PROG_SEC("raw_tracepoint/",	BPF_PROG_TYPE_RAW_TRACEPOINT),
 	BPF_PROG_SEC("xdp",		BPF_PROG_TYPE_XDP),
 	BPF_PROG_SEC("perf_event",	BPF_PROG_TYPE_PERF_EVENT),
 	BPF_PROG_SEC("cgroup/skb",	BPF_PROG_TYPE_CGROUP_SKB),
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index a3a62a5..8b24248 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -185,6 +185,7 @@ int bpf_program__nth_fd(struct bpf_program *prog, int n);
  */
 int bpf_program__set_socket_filter(struct bpf_program *prog);
 int bpf_program__set_tracepoint(struct bpf_program *prog);
+int bpf_program__set_raw_tracepoint(struct bpf_program *prog);
 int bpf_program__set_kprobe(struct bpf_program *prog);
 int bpf_program__set_sched_cls(struct bpf_program *prog);
 int bpf_program__set_sched_act(struct bpf_program *prog);
@@ -194,6 +195,7 @@ void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type);
 
 bool bpf_program__is_socket_filter(struct bpf_program *prog);
 bool bpf_program__is_tracepoint(struct bpf_program *prog);
+bool bpf_program__is_raw_tracepoint(struct bpf_program *prog);
 bool bpf_program__is_kprobe(struct bpf_program *prog);
 bool bpf_program__is_sched_cls(struct bpf_program *prog);
 bool bpf_program__is_sched_act(struct bpf_program *prog);
-- 
2.9.5

^ permalink raw reply related

* [PATCH net,stable] tun: fix vlan packet truncation
From: Bjørn Mork @ 2018-04-16 22:00 UTC (permalink / raw)
  To: netdev; +Cc: Bjørn Mork, Jason Wang

Bogus trimming in tun_net_xmit() causes truncated vlan packets.

skb->len is correct whether or not skb_vlan_tag_present() is true. There
is no more reason to adjust the skb length on xmit in this driver than
any other driver. tun_put_user() adds 4 bytes to the total for tagged
packets because it transmits the tag inline to userspace.  This is
similar to a nic transmitting the tag inline on the wire.

Reproducing the bug by sending any tagged packet through back-to-back
connected tap interfaces:

 socat TUN,tun-type=tap,iff-up,tun-name=in TUN,tun-type=tap,iff-up,tun-name=out &
 ip link add link in name in.20 type vlan id 20
 ip addr add 10.9.9.9/24 dev in.20
 ip link set in.20 up
 tshark -nxxi in -f arp -c1 2>/dev/null &
 tshark -nxxi out -f arp -c1 2>/dev/null &
 ping -c 1 10.9.9.5 >/dev/null 2>&1

The output from the 'in' and 'out' interfaces are different when the
bug is present:

 Capturing on 'in'
 0000  ff ff ff ff ff ff 76 cf 76 37 d5 0a 81 00 00 14   ......v.v7......
 0010  08 06 00 01 08 00 06 04 00 01 76 cf 76 37 d5 0a   ..........v.v7..
 0020  0a 09 09 09 00 00 00 00 00 00 0a 09 09 05         ..............

 Capturing on 'out'
 0000  ff ff ff ff ff ff 76 cf 76 37 d5 0a 81 00 00 14   ......v.v7......
 0010  08 06 00 01 08 00 06 04 00 01 76 cf 76 37 d5 0a   ..........v.v7..
 0020  0a 09 09 09 00 00 00 00 00 00                     ..........

Fixes: aff3d70a07ff ("tun: allow to attach ebpf socket filter")
Cc: Jason Wang <jasowang@redhat.com>
Signed-off-by: Bjørn Mork <bjorn@mork.no>
---
 drivers/net/tun.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 28583aa0c17d..01cf8e3d8edc 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1103,13 +1103,6 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	len = run_ebpf_filter(tun, skb, len);
 
-	/* Trim extra bytes since we may insert vlan proto & TCI
-	 * in tun_put_user().
-	 */
-	len -= skb_vlan_tag_present(skb) ? sizeof(struct veth) : 0;
-	if (len <= 0 || pskb_trim(skb, len))
-		goto drop;
-
 	if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC)))
 		goto drop;
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH 00/10] New network driver for Amiga X-Surf 100 (m68k)
From: Michael Schmitz @ 2018-04-16 22:04 UTC (permalink / raw)
  To: netdev; +Cc: linux-m68k, Michael.Karcher

This patch series adds support for the Individual Computers X-Surf 100
network card for m68k Amiga, a network adapter based on the AX88796 chip set.

The driver was originally written for kernel version 3.19 by Michael Karcher
(see CC:), and adapted to 4.16 for submission to netdev by me. Questions  
regarding motivation for some of the changes are probably best directed at
Michael Karcher.

The driver has been tested by Adrian <glaubitz@physik.fu-berlin.de> who will
send his Tested-by tag separately.

A few changes to the ax88796 driver were required:
- to read the MAC address, some setup of the ax99796 chip must be done,
- attach to the MII bus only on device open to allow module unloading,
- allow to supersede ax_block_input/ax_block_output by card-specific
  optimized code,
- use an optional interrupt status callback to allow easier sharing of the
  card interrupt,
- set IRQF_SHARED if platform IRQ resource is marked shareable,
- add a dummy control register write to MII reset code so back-to-back
  reset requests work.

Some additional cleanup:
- do not attempt to free IRQ in ax_remove (complements 82533ad9a1c),
- unregister and free mdiobus resources in ax_mii_init error path,
- clear platform drvdata on probe fail and module remove.

The patch series, in order:

01/10 net: ax88796: Fix MAC address reading
02/10 net: ax88796: Attach MII bus only when open
03/10 net: ax88796: Do not free IRQ in ax_remove() (already freed in ax_close()).
04/10 net: ax88796: Add block_input/output hooks to ax_plat_data
05/10 net: ax88796: add interrupt status callback to platform data
06/10 net: ax88796: set IRQF_SHARED flag when IRQ resource is marked as shareable
07/10 net: ax88796: unregister mdiobus on ax_mii_init() fail
08/10 net: ax88796: Make reset more robust on AX88796B
09/10 net: ax88796: release platform device drvdata on probe error and module remove
10/10 net: New ax88796 platform driver for Amiga X-Surf 100 Zorro board (m68k)

 drivers/net/ethernet/8390/Kconfig    |   14 +-
 drivers/net/ethernet/8390/Makefile   |    1 +
 drivers/net/ethernet/8390/ax88796.c  |   66 +++++-
 drivers/net/ethernet/8390/xsurf100.c |  411 ++++++++++++++++++++++++++++++++++
 include/net/ax88796.h                |   14 +-
 5 files changed, 495 insertions(+), 11 deletions(-)

Cheers,

	Michael Schmitz

^ permalink raw reply

* [PATCH 01/10] net: ax88796: Fix MAC address reading
From: Michael Schmitz @ 2018-04-16 22:04 UTC (permalink / raw)
  To: netdev
  Cc: linux-m68k, Michael.Karcher, Michael Karcher, Michael Karcher,
	Michael Schmitz
In-Reply-To: <1523916285-6057-1-git-send-email-schmitzmic@gmail.com>

From: Michael Karcher <debian@mkarcher.dialup.fu-berlin.de>

To read the MAC address from the (virtual) SAprom, the remote DMA
unit needs to be set up like for every other process access to card-local
memory.

Signed-off-by: Michael Karcher <kernel@mkarcher.dialup.fu-berlin.de>
Signed-off-by: Michael Schmitz <schmitzmic@gmail.com>
---
 drivers/net/ethernet/8390/ax88796.c |    6 ++++++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index 2455547..2a256aa 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -671,10 +671,16 @@ static int ax_init_dev(struct net_device *dev)
 	if (ax->plat->flags & AXFLG_HAS_EEPROM) {
 		unsigned char SA_prom[32];
 
+		ei_outb(6, ioaddr + EN0_RCNTLO);
+		ei_outb(0, ioaddr + EN0_RCNTHI);
+		ei_outb(0, ioaddr + EN0_RSARLO);
+		ei_outb(0, ioaddr + EN0_RSARHI);
+		ei_outb(E8390_RREAD + E8390_START, ioaddr + NE_CMD);
 		for (i = 0; i < sizeof(SA_prom); i += 2) {
 			SA_prom[i] = ei_inb(ioaddr + NE_DATAPORT);
 			SA_prom[i + 1] = ei_inb(ioaddr + NE_DATAPORT);
 		}
+		ei_outb(ENISR_RDC, ioaddr + EN0_ISR);	/* Ack intr. */
 
 		if (ax->plat->wordlength == 2)
 			for (i = 0; i < 16; i++)
-- 
1.7.0.4

^ permalink raw reply related

* [PATCH 02/10] net: ax88796: Attach MII bus only when open
From: Michael Schmitz @ 2018-04-16 22:04 UTC (permalink / raw)
  To: netdev
  Cc: linux-m68k, Michael.Karcher, Michael Karcher, Michael Karcher,
	Michael Schmitz
In-Reply-To: <1523916285-6057-1-git-send-email-schmitzmic@gmail.com>

From: Michael Karcher <debian@mkarcher.dialup.fu-berlin.de>

Call ax_mii_init in ax_open(), and unregister/remove mdiobus resources
in ax_close().

This is needed to be able to unload the module, as the module is busy
while the MII bus is attached.

Signed-off-by: Michael Karcher <kernel@mkarcher.dialup.fu-berlin.de>
Signed-off-by: Michael Schmitz <schmitzmic@gmail.com>
---
 drivers/net/ethernet/8390/ax88796.c |   13 +++++++++----
 1 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index 2a256aa..f7b8911 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -79,6 +79,8 @@
 
 static u32 ax_msg_enable;
 
+static int ax_mii_init(struct net_device *dev);
+
 /* device private data */
 
 struct ax_device {
@@ -396,6 +398,10 @@ static int ax_open(struct net_device *dev)
 
 	netdev_dbg(dev, "open\n");
 
+	ret = ax_mii_init(dev);
+	if (ret)
+		goto failed_request_irq;
+
 	ret = request_irq(dev->irq, ax_ei_interrupt, ax->irqflags,
 			  dev->name, dev);
 	if (ret)
@@ -442,6 +448,9 @@ static int ax_close(struct net_device *dev)
 	phy_disconnect(dev->phydev);
 
 	free_irq(dev->irq, dev);
+
+	mdiobus_unregister(ax->mii_bus);
+	free_mdio_bitbang(ax->mii_bus);
 	return 0;
 }
 
@@ -758,10 +767,6 @@ static int ax_init_dev(struct net_device *dev)
 	dev->netdev_ops = &ax_netdev_ops;
 	dev->ethtool_ops = &ax_ethtool_ops;
 
-	ret = ax_mii_init(dev);
-	if (ret)
-		goto err_out;
-
 	ax_NS8390_init(dev, 0);
 
 	ret = register_netdev(dev);
-- 
1.7.0.4

^ permalink raw reply related

* [PATCH 03/10] net: ax88796: Do not free IRQ in ax_remove() (already freed in ax_close()).
From: Michael Schmitz @ 2018-04-16 22:04 UTC (permalink / raw)
  To: netdev
  Cc: linux-m68k, Michael.Karcher, John Paul Adrian Glaubitz,
	Michael Karcher
In-Reply-To: <1523916285-6057-1-git-send-email-schmitzmic@gmail.com>

From: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>

This complements the fix in 82533ad9a1c that removed the free_irq
call in the error path of probe, to also not call free_irq when
remove is called to revert the effects of probe.

Signed-off-by: Michael Karcher <kernel@mkarcher.dialup.fu-berlin.de>
---
 drivers/net/ethernet/8390/ax88796.c |    1 -
 1 files changed, 0 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index f7b8911..a4f23ba 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -791,7 +791,6 @@ static int ax_remove(struct platform_device *pdev)
 	struct resource *mem;
 
 	unregister_netdev(dev);
-	free_irq(dev->irq, dev);
 
 	iounmap(ei_local->mem);
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-- 
1.7.0.4

^ permalink raw reply related

* [PATCH 04/10] net: ax88796: Add block_input/output hooks to ax_plat_data
From: Michael Schmitz @ 2018-04-16 22:04 UTC (permalink / raw)
  To: netdev; +Cc: linux-m68k, Michael.Karcher, Michael Schmitz, Michael Karcher
In-Reply-To: <1523916285-6057-1-git-send-email-schmitzmic@gmail.com>

Add platform specific hooks for block transfer reads/writes of packet
buffer data, superseding the default provided ax_block_input/output.
Currently used for m68k Amiga XSurf100.

Signed-off-by: Michael Karcher <kernel@mkarcher.dialup.fu-berlin.de>
Signed-off-by: Michael Schmitz <schmitzmic@gmail.com>
---
 drivers/net/ethernet/8390/ax88796.c |   10 ++++++++--
 include/net/ax88796.h               |    9 ++++++++-
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index a4f23ba..9159235 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -758,8 +758,14 @@ static int ax_init_dev(struct net_device *dev)
 #endif
 
 	ei_local->reset_8390 = &ax_reset_8390;
-	ei_local->block_input = &ax_block_input;
-	ei_local->block_output = &ax_block_output;
+	if (ax->plat->block_input)
+		ei_local->block_input = ax->plat->block_input;
+	else
+		ei_local->block_input = &ax_block_input;
+	if (ax->plat->block_output)
+		ei_local->block_output = ax->plat->block_output;
+	else
+		ei_local->block_output = &ax_block_output;
 	ei_local->get_8390_hdr = &ax_get_8390_hdr;
 	ei_local->priv = 0;
 	ei_local->msg_enable = ax_msg_enable;
diff --git a/include/net/ax88796.h b/include/net/ax88796.h
index b9a3bec..26cc459 100644
--- a/include/net/ax88796.h
+++ b/include/net/ax88796.h
@@ -8,10 +8,11 @@
  * published by the Free Software Foundation.
  *
 */
-
 #ifndef __NET_AX88796_PLAT_H
 #define __NET_AX88796_PLAT_H
 
+struct net_device;
+
 #define AXFLG_HAS_EEPROM		(1<<0)
 #define AXFLG_MAC_FROMDEV		(1<<1)	/* device already has MAC */
 #define AXFLG_HAS_93CX6			(1<<2)	/* use eeprom_93cx6 driver */
@@ -26,6 +27,12 @@ struct ax_plat_data {
 	u32		*reg_offsets;	/* register offsets */
 	u8		*mac_addr;	/* MAC addr (only used when
 					   AXFLG_MAC_FROMPLATFORM is used */
+
+	/* uses default ax88796 buffer if set to NULL */
+	void (*block_output)(struct net_device *dev, int count,
+			const unsigned char *buf, int star_page);
+	void (*block_input)(struct net_device *dev, int count,
+			struct sk_buff *skb, int ring_offset);
 };
 
 #endif /* __NET_AX88796_PLAT_H */
-- 
1.7.0.4

^ permalink raw reply related

* [PATCH 05/10] net: ax88796: add interrupt status callback to platform data
From: Michael Schmitz @ 2018-04-16 22:04 UTC (permalink / raw)
  To: netdev; +Cc: linux-m68k, Michael.Karcher, Michael Schmitz, Michael Karcher
In-Reply-To: <1523916285-6057-1-git-send-email-schmitzmic@gmail.com>

To be able to tell the ax88796 driver whether it is sensible to enter
the 8390 interrupt handler, an "is this interrupt caused by the 88796"
callback has been added to the ax_plat_data structure (with NULL being
compatible to the previous behaviour).

Signed-off-by: Michael Karcher <kernel@mkarcher.dialup.fu-berlin.de>
Signed-off-by: Michael Schmitz <schmitzmic@gmail.com>
---
 drivers/net/ethernet/8390/ax88796.c |   23 +++++++++++++++++++++--
 include/net/ax88796.h               |    5 +++++
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index 9159235..b6d5bec 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -167,6 +167,21 @@ static void ax_reset_8390(struct net_device *dev)
 	ei_outb(ENISR_RESET, addr + EN0_ISR);	/* Ack intr. */
 }
 
+/* Wrapper for __ei_interrupt for platforms that have a platform-specific
+ * way to find out whether the interrupt request might be caused by
+ * the ax88796 chip.
+ */
+static irqreturn_t ax_ei_interrupt_filtered(int irq, void *dev_id)
+{
+	struct net_device *dev = dev_id;
+	struct ax_device *ax = to_ax_dev(dev);
+	struct platform_device *pdev = to_platform_device(dev->dev.parent);
+
+	if (!ax->plat->check_irq(pdev))
+		return IRQ_NONE;
+
+	return ax_ei_interrupt(irq, dev_id);
+}
 
 static void ax_get_8390_hdr(struct net_device *dev, struct e8390_pkt_hdr *hdr,
 			    int ring_page)
@@ -402,8 +417,12 @@ static int ax_open(struct net_device *dev)
 	if (ret)
 		goto failed_request_irq;
 
-	ret = request_irq(dev->irq, ax_ei_interrupt, ax->irqflags,
-			  dev->name, dev);
+	if (ax->plat->check_irq)
+		ret = request_irq(dev->irq, ax_ei_interrupt_filtered,
+				  ax->irqflags, dev->name, dev);
+	else
+		ret = request_irq(dev->irq, ax_ei_interrupt, ax->irqflags,
+				  dev->name, dev);
 	if (ret)
 		goto failed_request_irq;
 
diff --git a/include/net/ax88796.h b/include/net/ax88796.h
index 26cc459..26412cd 100644
--- a/include/net/ax88796.h
+++ b/include/net/ax88796.h
@@ -12,6 +12,7 @@
 #define __NET_AX88796_PLAT_H
 
 struct net_device;
+struct platform_device;
 
 #define AXFLG_HAS_EEPROM		(1<<0)
 #define AXFLG_MAC_FROMDEV		(1<<1)	/* device already has MAC */
@@ -33,6 +34,10 @@ struct ax_plat_data {
 			const unsigned char *buf, int star_page);
 	void (*block_input)(struct net_device *dev, int count,
 			struct sk_buff *skb, int ring_offset);
+	/* returns nonzero if a pending interrupt request might by caused by
+	 * the ax88786. Handles all interrupts if set to NULL
+	 */
+	int (*check_irq)(struct platform_device *pdev);
 };
 
 #endif /* __NET_AX88796_PLAT_H */
-- 
1.7.0.4

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox