Netdev List
 help / color / mirror / Atom feed
* [PATCH iproute2 0/2] ip rule: merger iprule_flush and add selector support
From: Hangbin Liu @ 2016-09-22  6:28 UTC (permalink / raw)
  To: netdev; +Cc: Stephen Hemminger, Phil Sutter, Hangbin Liu

When merge iprule_flush() and iprule_list_or_save(). Renamed
rtnl_filter_t filter to filter_fn because we want to use global
variable 'filter' to filter nlmsg in the next patch.

Hangbin Liu (2):
  ip rule: merge ip rule flush and list, save together
  ip rule: add selector support

 ip/iprule.c        | 295 +++++++++++++++++++++++++++++++++++++++++------------
 man/man8/ip-rule.8 |   6 +-
 2 files changed, 231 insertions(+), 70 deletions(-)

-- 
2.5.5

^ permalink raw reply

* [PATCH iproute2 1/2] ip rule: merge ip rule flush and list, save together
From: Hangbin Liu @ 2016-09-22  6:28 UTC (permalink / raw)
  To: netdev; +Cc: Stephen Hemminger, Phil Sutter, Hangbin Liu
In-Reply-To: <1474525729-2845-1-git-send-email-liuhangbin@gmail.com>

iprule_flush() and iprule_list_or_save() both call function
rtnl_wilddump_request() and rtnl_dump_filter(). So merge them
together just like other files do.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
---
 ip/iprule.c | 121 +++++++++++++++++++++++++++---------------------------------
 1 file changed, 54 insertions(+), 67 deletions(-)

diff --git a/ip/iprule.c b/ip/iprule.c
index 70562c5..e18505f 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -27,6 +27,12 @@
 #include "utils.h"
 #include "ip_common.h"
 
+enum list_action {
+	IPRULE_LIST,
+	IPRULE_FLUSH,
+	IPRULE_SAVE,
+};
+
 extern struct rtnl_handle rth;
 
 static void usage(void) __attribute__((noreturn));
@@ -243,24 +249,61 @@ static int save_rule(const struct sockaddr_nl *who,
 	return ret == n->nlmsg_len ? 0 : ret;
 }
 
-static int iprule_list_or_save(int argc, char **argv, int save)
+static int flush_rule(const struct sockaddr_nl *who, struct nlmsghdr *n,
+		      void *arg)
+{
+	struct rtnl_handle rth2;
+	struct rtmsg *r = NLMSG_DATA(n);
+	int len = n->nlmsg_len;
+	struct rtattr *tb[FRA_MAX+1];
+
+	len -= NLMSG_LENGTH(sizeof(*r));
+	if (len < 0)
+		return -1;
+
+	parse_rtattr(tb, FRA_MAX, RTM_RTA(r), len);
+
+	if (tb[FRA_PRIORITY]) {
+		n->nlmsg_type = RTM_DELRULE;
+		n->nlmsg_flags = NLM_F_REQUEST;
+
+		if (rtnl_open(&rth2, 0) < 0)
+			return -1;
+
+		if (rtnl_talk(&rth2, n, NULL, 0) < 0)
+			return -2;
+
+		rtnl_close(&rth2);
+	}
+
+	return 0;
+}
+
+static int iprule_list_flush_or_save(int argc, char **argv, int action)
 {
-	rtnl_filter_t filter = print_rule;
+	rtnl_filter_t filter_fn;
 	int af = preferred_family;
 
 	if (af == AF_UNSPEC)
 		af = AF_INET;
 
 	if (argc > 0) {
-		fprintf(stderr, "\"ip rule %s\" does not take any arguments.\n",
-				save ? "save" : "show");
+		fprintf(stderr,
+			"\"ip rule list/flush/save\" does not take any arguments\n");
 		return -1;
 	}
 
-	if (save) {
+	switch (action) {
+	case IPRULE_SAVE:
 		if (save_rule_prep())
 			return -1;
-		filter = save_rule;
+		filter_fn = save_rule;
+		break;
+	case IPRULE_FLUSH:
+		filter_fn = flush_rule;
+		break;
+	default:
+		filter_fn = print_rule;
 	}
 
 	if (rtnl_wilddump_request(&rth, af, RTM_GETRULE) < 0) {
@@ -268,7 +311,7 @@ static int iprule_list_or_save(int argc, char **argv, int save)
 		return 1;
 	}
 
-	if (rtnl_dump_filter(&rth, filter, stdout) < 0) {
+	if (rtnl_dump_filter(&rth, filter_fn, stdout) < 0) {
 		fprintf(stderr, "Dump terminated\n");
 		return 1;
 	}
@@ -511,72 +554,16 @@ static int iprule_modify(int cmd, int argc, char **argv)
 	return 0;
 }
 
-
-static int flush_rule(const struct sockaddr_nl *who, struct nlmsghdr *n,
-		      void *arg)
-{
-	struct rtnl_handle rth2;
-	struct rtmsg *r = NLMSG_DATA(n);
-	int len = n->nlmsg_len;
-	struct rtattr *tb[FRA_MAX+1];
-
-	len -= NLMSG_LENGTH(sizeof(*r));
-	if (len < 0)
-		return -1;
-
-	parse_rtattr(tb, FRA_MAX, RTM_RTA(r), len);
-
-	if (tb[FRA_PRIORITY]) {
-		n->nlmsg_type = RTM_DELRULE;
-		n->nlmsg_flags = NLM_F_REQUEST;
-
-		if (rtnl_open(&rth2, 0) < 0)
-			return -1;
-
-		if (rtnl_talk(&rth2, n, NULL, 0) < 0)
-			return -2;
-
-		rtnl_close(&rth2);
-	}
-
-	return 0;
-}
-
-static int iprule_flush(int argc, char **argv)
-{
-	int af = preferred_family;
-
-	if (af == AF_UNSPEC)
-		af = AF_INET;
-
-	if (argc > 0) {
-		fprintf(stderr, "\"ip rule flush\" does not allow arguments\n");
-		return -1;
-	}
-
-	if (rtnl_wilddump_request(&rth, af, RTM_GETRULE) < 0) {
-		perror("Cannot send dump request");
-		return 1;
-	}
-
-	if (rtnl_dump_filter(&rth, flush_rule, NULL) < 0) {
-		fprintf(stderr, "Flush terminated\n");
-		return 1;
-	}
-
-	return 0;
-}
-
 int do_iprule(int argc, char **argv)
 {
 	if (argc < 1) {
-		return iprule_list_or_save(0, NULL, 0);
+		return iprule_list_flush_or_save(0, NULL, IPRULE_LIST);
 	} else if (matches(argv[0], "list") == 0 ||
 		   matches(argv[0], "lst") == 0 ||
 		   matches(argv[0], "show") == 0) {
-		return iprule_list_or_save(argc-1, argv+1, 0);
+		return iprule_list_flush_or_save(argc-1, argv+1, IPRULE_LIST);
 	} else if (matches(argv[0], "save") == 0) {
-		return iprule_list_or_save(argc-1, argv+1, 1);
+		return iprule_list_flush_or_save(argc-1, argv+1, IPRULE_SAVE);
 	} else if (matches(argv[0], "restore") == 0) {
 		return iprule_restore();
 	} else if (matches(argv[0], "add") == 0) {
@@ -584,7 +571,7 @@ int do_iprule(int argc, char **argv)
 	} else if (matches(argv[0], "delete") == 0) {
 		return iprule_modify(RTM_DELRULE, argc-1, argv+1);
 	} else if (matches(argv[0], "flush") == 0) {
-		return iprule_flush(argc-1, argv+1);
+		return iprule_list_flush_or_save(argc-1, argv+1, IPRULE_FLUSH);
 	} else if (matches(argv[0], "help") == 0)
 		usage();
 
-- 
2.5.5

^ permalink raw reply related

* [PATCH iproute2 2/2] ip rule: add selector support
From: Hangbin Liu @ 2016-09-22  6:28 UTC (permalink / raw)
  To: netdev; +Cc: Stephen Hemminger, Phil Sutter, Hangbin Liu
In-Reply-To: <1474525729-2845-1-git-send-email-liuhangbin@gmail.com>

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
---
 ip/iprule.c        | 180 +++++++++++++++++++++++++++++++++++++++++++++++++++--
 man/man8/ip-rule.8 |   6 +-
 2 files changed, 180 insertions(+), 6 deletions(-)

diff --git a/ip/iprule.c b/ip/iprule.c
index e18505f..42fb6af 100644
--- a/ip/iprule.c
+++ b/ip/iprule.c
@@ -20,6 +20,7 @@
 #include <netinet/ip.h>
 #include <arpa/inet.h>
 #include <string.h>
+#include <linux/if.h>
 #include <linux/fib_rules.h>
 #include <errno.h>
 
@@ -41,7 +42,7 @@ static void usage(void)
 {
 	fprintf(stderr, "Usage: ip rule { add | del } SELECTOR ACTION\n");
 	fprintf(stderr, "       ip rule { flush | save | restore }\n");
-	fprintf(stderr, "       ip rule [ list ]\n");
+	fprintf(stderr, "       ip rule [ list [ SELECTOR ]]\n");
 	fprintf(stderr, "SELECTOR := [ not ] [ from PREFIX ] [ to PREFIX ] [ tos TOS ] [ fwmark FWMARK[/MASK] ]\n");
 	fprintf(stderr, "            [ iif STRING ] [ oif STRING ] [ pref NUMBER ] [ l3mdev ]\n");
 	fprintf(stderr, "ACTION := [ table TABLE_ID ]\n");
@@ -55,6 +56,105 @@ static void usage(void)
 	exit(-1);
 }
 
+static struct
+{
+	int not;
+	int l3mdev;
+	int iifmask, oifmask;
+	unsigned int tb;
+	unsigned int tos, tosmask;
+	unsigned int pref, prefmask;
+	unsigned int fwmark, fwmask;
+	char iif[IFNAMSIZ];
+	char oif[IFNAMSIZ];
+	inet_prefix src;
+	inet_prefix dst;
+} filter;
+
+static bool filter_nlmsg(struct nlmsghdr *n, struct rtattr **tb, int host_len)
+{
+	struct rtmsg *r = NLMSG_DATA(n);
+	inet_prefix src = { .family = r->rtm_family };
+	inet_prefix dst = { .family = r->rtm_family };
+	__u32 table;
+
+	if (preferred_family != AF_UNSPEC && r->rtm_family != preferred_family)
+		return false;
+
+	if (filter.prefmask &&
+	    filter.pref ^ (tb[FRA_PRIORITY] ? rta_getattr_u32(tb[FRA_PRIORITY]) : 0))
+		return false;
+	if (filter.not && !(r->rtm_flags & FIB_RULE_INVERT))
+		return false;
+
+	if (filter.src.family) {
+		if (tb[FRA_SRC]) {
+			memcpy(&src.data, RTA_DATA(tb[FRA_SRC]),
+			       (r->rtm_src_len + 7) / 8);
+		}
+		if (filter.src.family != r->rtm_family ||
+		    filter.src.bitlen > r->rtm_src_len ||
+		    inet_addr_match(&src, &filter.src, filter.src.bitlen))
+			return false;
+	}
+
+	if (filter.dst.family) {
+		if (tb[FRA_DST]) {
+			memcpy(&dst.data, RTA_DATA(tb[FRA_DST]),
+			       (r->rtm_dst_len + 7) / 8);
+		}
+		if (filter.dst.family != r->rtm_family ||
+		    filter.dst.bitlen > r->rtm_dst_len ||
+		    inet_addr_match(&dst, &filter.dst, filter.dst.bitlen))
+			return false;
+	}
+
+	if (filter.tosmask && filter.tos ^ r->rtm_tos)
+		return false;
+
+	if (filter.fwmark) {
+		__u32 mark = 0;
+		if (tb[FRA_FWMARK])
+			mark = rta_getattr_u32(tb[FRA_FWMARK]);
+		if (filter.fwmark ^ mark)
+			return false;
+	}
+	if (filter.fwmask) {
+		__u32 mask = 0;
+		if (tb[FRA_FWMASK])
+			mask = rta_getattr_u32(tb[FRA_FWMASK]);
+		if (filter.fwmask ^ mask)
+			return false;
+	}
+
+	if (filter.iifmask) {
+		if (tb[FRA_IFNAME]) {
+			if (strcmp(filter.iif, rta_getattr_str(tb[FRA_IFNAME])) != 0)
+				return false;
+		} else {
+			return false;
+		}
+	}
+
+	if (filter.oifmask) {
+		if (tb[FRA_OIFNAME]) {
+			if (strcmp(filter.oif, rta_getattr_str(tb[FRA_OIFNAME])) != 0)
+				return false;
+		} else {
+			return false;
+		}
+	}
+
+	if (filter.l3mdev && !(tb[FRA_L3MDEV] && rta_getattr_u8(tb[FRA_L3MDEV])))
+		return false;
+
+	table = rtm_get_table(r, tb);
+	if (filter.tb > 0 && filter.tb ^ table)
+		return false;
+
+	return true;
+}
+
 int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
 {
 	FILE *fp = (FILE *)arg;
@@ -77,6 +177,9 @@ int print_rule(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
 
 	host_len = af_bit_len(r->rtm_family);
 
+	if(!filter_nlmsg(n, tb, host_len))
+		return 0;
+
 	if (n->nlmsg_type == RTM_DELRULE)
 		fprintf(fp, "Deleted ");
 
@@ -287,9 +390,9 @@ static int iprule_list_flush_or_save(int argc, char **argv, int action)
 	if (af == AF_UNSPEC)
 		af = AF_INET;
 
-	if (argc > 0) {
-		fprintf(stderr,
-			"\"ip rule list/flush/save\" does not take any arguments\n");
+	if (action != IPRULE_LIST && argc > 0) {
+		fprintf(stderr, "\"ip rule %s\" does not take any arguments.\n",
+				action == IPRULE_SAVE ? "save" : "flush");
 		return -1;
 	}
 
@@ -306,6 +409,75 @@ static int iprule_list_flush_or_save(int argc, char **argv, int action)
 		filter_fn = print_rule;
 	}
 
+	memset(&filter, 0, sizeof(filter));
+
+	while (argc > 0) {
+		if (matches(*argv, "preference") == 0 ||
+		    matches(*argv, "order") == 0 ||
+		    matches(*argv, "priority") == 0) {
+			__u32 pref;
+			NEXT_ARG();
+			if (get_u32(&pref, *argv, 0))
+				invarg("preference value is invalid\n", *argv);
+			filter.pref = pref;
+			filter.prefmask = 1;
+		} else if (strcmp(*argv, "not") == 0) {
+			filter.not = 1;
+		} else if (strcmp(*argv, "tos") == 0) {
+			__u32 tos;
+			NEXT_ARG();
+			if (rtnl_dsfield_a2n(&tos, *argv))
+				invarg("TOS value is invalid\n", *argv);
+			filter.tos = tos;
+			filter.tosmask = 1;
+		} else if (strcmp(*argv, "fwmark") == 0) {
+			char *slash;
+			__u32 fwmark, fwmask;
+			NEXT_ARG();
+			slash = strchr(*argv, '/');
+			if (slash != NULL)
+				*slash = '\0';
+			if (get_u32(&fwmark, *argv, 0))
+				invarg("fwmark value is invalid\n", *argv);
+			filter.fwmark = fwmark;
+			if (slash) {
+				if (get_u32(&fwmask, slash+1, 0))
+					invarg("fwmask value is invalid\n",
+					       slash+1);
+				filter.fwmask = fwmask;
+			}
+		} else if (strcmp(*argv, "dev") == 0 ||
+			   strcmp(*argv, "iif") == 0) {
+			NEXT_ARG();
+			strncpy(filter.iif, *argv, IFNAMSIZ);
+			filter.iifmask = 1;
+		} else if (strcmp(*argv, "oif") == 0) {
+			NEXT_ARG();
+			strncpy(filter.oif, *argv, IFNAMSIZ);
+			filter.oifmask = 1;
+		} else if (strcmp(*argv, "l3mdev") == 0) {
+			filter.l3mdev = 1;
+		} else if (matches(*argv, "lookup") == 0 ||
+			   matches(*argv, "table") == 0 ) {
+			__u32 tid;
+			NEXT_ARG();
+			if (rtnl_rttable_a2n(&tid, *argv))
+				invarg("table id value is invalid\n", *argv);
+			filter.tb = tid;
+		} else if (matches(*argv, "from") == 0 ||
+			   matches(*argv, "src") == 0) {
+			NEXT_ARG();
+			get_prefix(&filter.src, *argv, af);
+		} else {
+			if (matches(*argv, "dst") == 0 ||
+			    matches(*argv, "to") == 0) {
+				NEXT_ARG();
+			}
+			get_prefix(&filter.dst, *argv, af);
+		}
+		argc--; argv++;
+	}
+
 	if (rtnl_wilddump_request(&rth, af, RTM_GETRULE) < 0) {
 		perror("Cannot send dump request");
 		return 1;
diff --git a/man/man8/ip-rule.8 b/man/man8/ip-rule.8
index 3508d80..ec0e31d 100644
--- a/man/man8/ip-rule.8
+++ b/man/man8/ip-rule.8
@@ -15,7 +15,8 @@ ip-rule \- routing policy database management
 
 .ti -8
 .B  ip rule
-.RB "[ " list " ]"
+.RB "[ " list
+.I "[ " SELECTOR " ]]"
 
 .ti -8
 .B  ip rule
@@ -42,7 +43,8 @@ ip-rule \- routing policy database management
 .B  oif
 .IR STRING " ] [ "
 .B  pref
-.IR NUMBER " ]"
+.IR NUMBER " ] [ "
+.BR l3mdev " ]"
 
 .ti -8
 .IR ACTION " := [ "
-- 
2.5.5

^ permalink raw reply related

* [PATCH nf v4] netfilter: seqadj: Fix the wrong ack adjust for the RST packet without ack
From: fgao @ 2016-09-22  6:29 UTC (permalink / raw)
  To: pablo, kaber, netfilter-devel, netdev; +Cc: gfree.wind, Gao Feng

From: Gao Feng <fgao@ikuai8.com>

It is valid that the TCP RST packet which does not set ack flag, and bytes
of ack number are zero. But current seqadj codes would adjust the "0" ack
to invalid ack number. Actually seqadj need to check the ack flag before
adjust it for these RST packets.

The following is my test case

client is 10.26.98.245, and add one iptable rule:
iptables  -I INPUT -p tcp --sport 12345 -m connbytes --connbytes 2:
--connbytes-dir reply --connbytes-mode packets -j REJECT --reject-with
tcp-reset
This iptables rule could generate on TCP RST without ack flag.

server:10.172.135.55
Enable the synproxy with seqadjust by the following iptables rules
iptables -t raw -A PREROUTING -i eth0 -p tcp -d 10.172.135.55 --dport 12345
-m tcp --syn -j CT --notrack

iptables -A INPUT -i eth0 -p tcp -d 10.172.135.55 --dport 12345 -m conntrack
--ctstate INVALID,UNTRACKED -j SYNPROXY --sack-perm --timestamp --wscale 7
--mss 1460
iptables -A OUTPUT -o eth0 -p tcp -s 10.172.135.55 --sport 12345 -m conntrack
--ctstate INVALID,UNTRACKED -m tcp --tcp-flags SYN,RST,ACK SYN,ACK -j ACCEPT

The following is my test result.

1. packet trace on client
root@routers:/tmp# tcpdump -i eth0 tcp port 12345 -n
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on eth0, link-type EN10MB (Ethernet), capture size 65535 bytes
IP 10.26.98.245.45154 > 10.172.135.55.12345: Flags [S], seq 3695959829,
win 29200, options [mss 1460,sackOK,TS val 452367884 ecr 0,nop,wscale 7],
length 0
IP 10.172.135.55.12345 > 10.26.98.245.45154: Flags [S.], seq 546723266,
ack 3695959830, win 0, options [mss 1460,sackOK,TS val 15643479 ecr 452367884,
nop,wscale 7], length 0
IP 10.26.98.245.45154 > 10.172.135.55.12345: Flags [.], ack 1, win 229,
options [nop,nop,TS val 452367885 ecr 15643479], length 0
IP 10.172.135.55.12345 > 10.26.98.245.45154: Flags [.], ack 1, win 226,
options [nop,nop,TS val 15643479 ecr 452367885], length 0
IP 10.26.98.245.45154 > 10.172.135.55.12345: Flags [R], seq 3695959830,
win 0, length 0

2. seqadj log on server
[62873.867319] Adjusting sequence number from 602341895->546723267,
ack from 3695959830->3695959830
[62873.867644] Adjusting sequence number from 602341895->546723267,
ack from 3695959830->3695959830
[62873.869040] Adjusting sequence number from 3695959830->3695959830,
ack from 0->55618628

To summarize, it is clear that the seqadj codes adjust the 0 ack when receive
one TCP RST packet without ack.

Signed-off-by: Gao Feng <fgao@ikuai8.com>
---
 v4: Don't invoke nf_ct_sack_adjust when no ack flag
 v3: Add the reproduce steps and packet trace
 v2: Regenerate because the first patch is removed
 v1: Initial patch

 net/netfilter/nf_conntrack_seqadj.c | 37 ++++++++++++++++++++-----------------
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c
index dff0f0c..80ab429 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -169,7 +169,7 @@ int nf_ct_seq_adjust(struct sk_buff *skb,
 	s32 seqoff, ackoff;
 	struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
 	struct nf_ct_seqadj *this_way, *other_way;
-	int res;
+	int res = 1;
 
 	this_way  = &seqadj->seq[dir];
 	other_way = &seqadj->seq[!dir];
@@ -184,27 +184,30 @@ int nf_ct_seq_adjust(struct sk_buff *skb,
 	else
 		seqoff = this_way->offset_before;
 
-	if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
-		  other_way->correction_pos))
-		ackoff = other_way->offset_after;
-	else
-		ackoff = other_way->offset_before;
-
 	newseq = htonl(ntohl(tcph->seq) + seqoff);
-	newack = htonl(ntohl(tcph->ack_seq) - ackoff);
-
 	inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, false);
-	inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack,
-				 false);
+	pr_debug("Adjusting sequence number from %u->%u\n",
+		 ntohl(tcph->seq), ntohl(newseq));
+	tcph->seq = newseq;
 
-	pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
-		 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
-		 ntohl(newack));
+	if (likely(tcph->ack)) {
+		if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
+			  other_way->correction_pos))
+			ackoff = other_way->offset_after;
+		else
+			ackoff = other_way->offset_before;
 
-	tcph->seq = newseq;
-	tcph->ack_seq = newack;
+		newack = htonl(ntohl(tcph->ack_seq) - ackoff);
+		inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq,
+					 newack, false);
+		pr_debug("Adjusting ack number from %u->%u, ack from %u->%u\n",
+			 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
+			 ntohl(newack));
+		tcph->ack_seq = newack;
+
+		res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo);
+	}
 
-	res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo);
 	spin_unlock_bh(&ct->lock);
 
 	return res;
-- 
1.9.1

^ permalink raw reply related

* Re: [PATCH net] net: get rid of an signed integer overflow in ip_idents_reserve()
From: David Miller @ 2016-09-22  6:42 UTC (permalink / raw)
  To: eric.dumazet; +Cc: jiri, netdev
In-Reply-To: <1474419977.23058.53.camel@edumazet-glaptop3.roam.corp.google.com>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 20 Sep 2016 18:06:17 -0700

> From: Eric Dumazet <edumazet@google.com>
> 
> Jiri Pirko reported an UBSAN warning happening in ip_idents_reserve()
> 
> [] UBSAN: Undefined behaviour in ./arch/x86/include/asm/atomic.h:156:11
> [] signed integer overflow:
> [] -2117905507 + -695755206 cannot be represented in type 'int'
> 
> Since we do not have uatomic_add_return() yet, use atomic_cmpxchg()
> so that the arithmetics can be done using unsigned int.
> 
> Fixes: 04ca6973f7c1 ("ip: make IP identifiers less predictable")
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Reported-by: Jiri Pirko <jiri@resnulli.us>
> ---
> David, Jiri, I removed the prandom_u32() stuff in favor of a traditional
> loop to meet stable requirements. Thanks !

Applied.

^ permalink raw reply

* Re: [PATCH net-next] MAINTAINERS: Update b44 maintainer.
From: David Miller @ 2016-09-22  6:44 UTC (permalink / raw)
  To: michael.chan; +Cc: netdev, f.fainelli
In-Reply-To: <1474428795-25095-1-git-send-email-michael.chan@broadcom.com>

From: Michael Chan <michael.chan@broadcom.com>
Date: Tue, 20 Sep 2016 23:33:15 -0400

> Taking over as maintainer since Gary Zambrano is no longer working
> for Broadcom.
> 
> Signed-off-by: Michael Chan <michael.chan@broadcom.com>

Applied, thanks.

^ permalink raw reply

* Re: [PATCH net-next] tcp: implement TSQ for retransmits
From: David Miller @ 2016-09-22  6:44 UTC (permalink / raw)
  To: eric.dumazet; +Cc: ncardwell, ycheng, netdev
In-Reply-To: <1474436758.23058.75.camel@edumazet-glaptop3.roam.corp.google.com>

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 20 Sep 2016 22:45:58 -0700

> From: Eric Dumazet <edumazet@google.com>
> 
> We saw sch_fq drops caused by the per flow limit of 100 packets and TCP
> when dealing with large cwnd and bursts of retransmits.
> 
> Even after increasing the limit to 1000, and even after commit
> 10d3be569243 ("tcp-tso: do not split TSO packets at retransmit time"),
> we can still have these drops.
> 
> Under certain conditions, TCP can spend a considerable amount of
> time queuing thousands of skbs in a single tcp_xmit_retransmit_queue()
> invocation, incurring latency spikes and stalls of other softirq
> handlers.
> 
> This patch implements TSQ for retransmits, limiting number of packets
> and giving more chance for scheduling packets in both ways.
> 
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Signed-off-by: Yuchung Cheng <ycheng@google.com>
> Signed-off-by: Neal Cardwell <ncardwell@google.com>

Applied.

^ permalink raw reply

* Re: pull-request: can 2016-09-21
From: David Miller @ 2016-09-22  6:48 UTC (permalink / raw)
  To: mkl; +Cc: netdev, linux-can, kernel
In-Reply-To: <20160921084355.20972-1-mkl@pengutronix.de>

From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Wed, 21 Sep 2016 10:43:54 +0200

> this is another pull request of one patch for the upcoming linux-4.8 release.
> 
> Marek Vasut fixes the CAN-FD bit rate switch in the ifi driver by configuring
> the transmitter delay.

Pulled, thanks.

^ permalink raw reply

* Re: [patch net-next 3/6] mlxsw: spectrum_router: Use FIB notifications instead of switchdev calls
From: Ido Schimmel @ 2016-09-22  6:51 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: netdev, davem, idosch, eladr, yotamg, nogahf, ogerlitz, roopa,
	nikolay, linville, andy, f.fainelli, dsa, jhs, vivien.didelot,
	andrew, ivecera, kaber, john
In-Reply-To: <1474458794-5512-4-git-send-email-jiri@resnulli.us>

On Wed, Sep 21, 2016 at 01:53:11PM +0200, Jiri Pirko wrote:
> From: Jiri Pirko <jiri@mellanox.com>
> 
> Until now, in order to offload a FIB entry to HW we use switchdev op.
> However that has limits. Mainly in case we need to make the HW aware of
> all route prefixes configured in kernel. HW needs to know those in order
> to properly trap appropriate packets and pass the to kernel to do
> the forwarding. Abort mechanism is now handled within the mlxsw driver.

FWIW, I think it's smart to move abort into the driver instead of
flushing all the routes from the namespace as before.

> 
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>

[...]

> +static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
> +{
> +	char ralue_pl[MLXSW_REG_RALUE_LEN];
> +	struct mlxsw_resources *resources;
> +	struct mlxsw_sp_fib_entry *fib_entry;
> +	struct mlxsw_sp_fib_entry *tmp;
> +	struct mlxsw_sp_vr *vr;
> +	int i;
> +	int err;
> +
> +	resources = mlxsw_core_resources_get(mlxsw_sp->core);
> +	for (i = 0; i < resources->max_virtual_routers; i++) {
> +		vr = &mlxsw_sp->router.vrs[i];
> +		if (!vr->used)
> +			continue;
> +
> +		list_for_each_entry_safe(fib_entry, tmp,
> +					 &vr->fib->entry_list, list) {
> +			fib_info_offload_dec(fib_entry->fi);
> +			mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
> +			mlxsw_sp_fib_entry_remove(fib_entry->vr->fib,
> +						  fib_entry);
> +			mlxsw_sp_fib_entry_put_all(mlxsw_sp, fib_entry);

If we now do the routing in slow path, then maybe it makes sense to also
flush all the neighbour entries and prevent new neighbours from being
programmed into the device?

> +		}
> +	}
> +	mlxsw_sp->router.aborted = true;
> +
> +	mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4,
> +			      MLXSW_REG_RALUE_OP_WRITE_WRITE, 0, 0, 0);

I'm not sure about that, but the loop above removed all the tables from
the device and now you are using table 0 again. Will this work w/o
binding some tree to it (0?)?

> +	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
> +	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
> +	if (err)
> +		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
> +}

Thanks

^ permalink raw reply

* RE: [RFC v2 06/12] qedr: Add support for QP verbs
From: Amrani, Ram @ 2016-09-22  6:51 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org,
	dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org, Elior, Ariel,
	Kalderon, Michal, Mintz, Yuval, Borundia, Rajesh,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
In-Reply-To: <20160921155509.GC1510-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>

> 
> Do you have a git tree?
> 
We don't have a publicly accessible git tree.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH nf v5] netfilter: seqadj: Fix the wrong ack adjust for the RST packet without ack
From: fgao @ 2016-09-22  6:53 UTC (permalink / raw)
  To: pablo, kaber, netfilter-devel, netdev; +Cc: gfree.wind, Gao Feng

From: Gao Feng <fgao@ikuai8.com>

It is valid that the TCP RST packet which does not set ack flag, and bytes
of ack number are zero. But current seqadj codes would adjust the "0" ack
to invalid ack number. Actually seqadj need to check the ack flag before
adjust it for these RST packets.

The following is my test case

client is 10.26.98.245, and add one iptable rule:
iptables  -I INPUT -p tcp --sport 12345 -m connbytes --connbytes 2:
--connbytes-dir reply --connbytes-mode packets -j REJECT --reject-with
tcp-reset
This iptables rule could generate on TCP RST without ack flag.

server:10.172.135.55
Enable the synproxy with seqadjust by the following iptables rules
iptables -t raw -A PREROUTING -i eth0 -p tcp -d 10.172.135.55 --dport 12345
-m tcp --syn -j CT --notrack

iptables -A INPUT -i eth0 -p tcp -d 10.172.135.55 --dport 12345 -m conntrack
--ctstate INVALID,UNTRACKED -j SYNPROXY --sack-perm --timestamp --wscale 7
--mss 1460
iptables -A OUTPUT -o eth0 -p tcp -s 10.172.135.55 --sport 12345 -m conntrack
--ctstate INVALID,UNTRACKED -m tcp --tcp-flags SYN,RST,ACK SYN,ACK -j ACCEPT

The following is my test result.

1. packet trace on client
root@routers:/tmp# tcpdump -i eth0 tcp port 12345 -n
tcpdump: verbose output suppressed, use -v or -vv for full protocol decode
listening on eth0, link-type EN10MB (Ethernet), capture size 65535 bytes
IP 10.26.98.245.45154 > 10.172.135.55.12345: Flags [S], seq 3695959829,
win 29200, options [mss 1460,sackOK,TS val 452367884 ecr 0,nop,wscale 7],
length 0
IP 10.172.135.55.12345 > 10.26.98.245.45154: Flags [S.], seq 546723266,
ack 3695959830, win 0, options [mss 1460,sackOK,TS val 15643479 ecr 452367884,
nop,wscale 7], length 0
IP 10.26.98.245.45154 > 10.172.135.55.12345: Flags [.], ack 1, win 229,
options [nop,nop,TS val 452367885 ecr 15643479], length 0
IP 10.172.135.55.12345 > 10.26.98.245.45154: Flags [.], ack 1, win 226,
options [nop,nop,TS val 15643479 ecr 452367885], length 0
IP 10.26.98.245.45154 > 10.172.135.55.12345: Flags [R], seq 3695959830,
win 0, length 0

2. seqadj log on server
[62873.867319] Adjusting sequence number from 602341895->546723267,
ack from 3695959830->3695959830
[62873.867644] Adjusting sequence number from 602341895->546723267,
ack from 3695959830->3695959830
[62873.869040] Adjusting sequence number from 3695959830->3695959830,
ack from 0->55618628

To summarize, it is clear that the seqadj codes adjust the 0 ack when receive
one TCP RST packet without ack.

Signed-off-by: Gao Feng <fgao@ikuai8.com>
---
 v5: Use goto to decrease the patch size
 v4: Don't invoke nf_ct_sack_adjust when no ack flag
 v3: Add the reproduce steps and packet trace
 v2: Regenerate because the first patch is removed
 v1: Initial patch

 net/netfilter/nf_conntrack_seqadj.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c
index dff0f0c..08d0640 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -169,7 +169,7 @@ int nf_ct_seq_adjust(struct sk_buff *skb,
 	s32 seqoff, ackoff;
 	struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
 	struct nf_ct_seqadj *this_way, *other_way;
-	int res;
+	int res = 1;
 
 	this_way  = &seqadj->seq[dir];
 	other_way = &seqadj->seq[!dir];
@@ -184,27 +184,31 @@ int nf_ct_seq_adjust(struct sk_buff *skb,
 	else
 		seqoff = this_way->offset_before;
 
+	newseq = htonl(ntohl(tcph->seq) + seqoff);
+	inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, false);
+	pr_debug("Adjusting sequence number from %u->%u\n",
+		 ntohl(tcph->seq), ntohl(newseq));
+	tcph->seq = newseq;
+
+	if (unlikely(!tcph->ack))
+		goto out;
+
 	if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
 		  other_way->correction_pos))
 		ackoff = other_way->offset_after;
 	else
 		ackoff = other_way->offset_before;
 
-	newseq = htonl(ntohl(tcph->seq) + seqoff);
 	newack = htonl(ntohl(tcph->ack_seq) - ackoff);
-
-	inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, false);
 	inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack,
 				 false);
-
-	pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
+	pr_debug("Adjusting ack number from %u->%u, ack from %u->%u\n",
 		 ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
 		 ntohl(newack));
-
-	tcph->seq = newseq;
 	tcph->ack_seq = newack;
 
 	res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo);
+out:
 	spin_unlock_bh(&ct->lock);
 
 	return res;
-- 
1.9.1


^ permalink raw reply related

* Re: [PATCH net-next V2 0/8] mlx5e XDP support
From: David Miller @ 2016-09-22  6:55 UTC (permalink / raw)
  To: tariqt; +Cc: netdev, eranbe, ranas, saeedm
In-Reply-To: <1474449589-27035-1-git-send-email-tariqt@mellanox.com>

From: Tariq Toukan <tariqt@mellanox.com>
Date: Wed, 21 Sep 2016 12:19:41 +0300

> This series adds XDP support in mlx5e driver.
> This includes the use cases: XDP_DROP, XDP_PASS, and XDP_TX.
> 
> Single stream performance tests show 16.5 Mpps for XDP_DROP,
> and 12.4 Mpps for XDP_TX, with nice scalability for multiple streams/rings.
> 
> This rate of XDP_DROP is lower than the 32 Mpps we got in previous
> implementation, when Striding RQ was used.
> 
> We moved to non-Striding RQ, as some XDP_TX requirements (like headroom,
> packet-per-page) cannot be satisfied with the current Striding RQ HW,
> and we decided to fully support both DROP/TX.
> 
> Few directions are considered in order to enable the faster rate for XDP_DROP,
> e.g a possibility for users to enable Striding RQ so they choose optimized
> XDP_DROP on the price of partial XDP_TX functionality, or some HW changes.
> 
> Series generated against net-next commit:
> cf714ac147e0 'ipvlan: Fix dependency issue'

Series applied, thanks.

^ permalink raw reply

* Re: pull request (net): ipsec 2016-09-21
From: David Miller @ 2016-09-22  6:56 UTC (permalink / raw)
  To: steffen.klassert; +Cc: herbert, netdev
In-Reply-To: <1474455946-674-1-git-send-email-steffen.klassert@secunet.com>

From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Wed, 21 Sep 2016 13:05:42 +0200

> 1) Propagate errors on security context allocation.
>    From Mathias Krause.
> 
> 2) Fix inbound policy checks for inter address family tunnels.
>    From Thomas Zeitlhofer.
> 
> 3) Fix an old memory leak on aead algorithm usage.
>    From Ilan Tayari.
> 
> 4) A recent patch fixed a possible NULL pointer dereference
>    but broke the vti6 input path.
>    Fix from Nicolas Dichtel.
> 
> Please pull or let me know if there are problems.

Pulled, thanks a lot Steffen.

^ permalink raw reply

* Re: [PATCH next v3 0/2] Rename WORD_TRUNC/ROUND macros and use them
From: David Miller @ 2016-09-22  7:13 UTC (permalink / raw)
  To: marcelo.leitner; +Cc: netdev, linux-sctp, nhorman, vyasevich, David.Laight
In-Reply-To: <cover.1474457954.git.marcelo.leitner@gmail.com>

From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Wed, 21 Sep 2016 08:45:54 -0300

> This patchset aims to rename these macros to a non-confusing name, as
> reported by David Laight and David Miller, and to update all remaining
> places to make use of it, which was 1 last remaining spot.
> 
> v3:
> - Name it SCTP_PAD4 instead of SCTP_ALIGN4, as suggested by David Laight
> v2:
> - fixed 2nd patch summary
> 
> Details on the specific changelogs.

Looks good, applied, thanks!

^ permalink raw reply

* Re: [PATCH -next] net: dsa: qca8k: fix non static symbol warning
From: David Miller @ 2016-09-22  7:17 UTC (permalink / raw)
  To: weiyj.lk; +Cc: john, weiyongjun1, netdev
In-Reply-To: <1474470283-31534-1-git-send-email-weiyj.lk@gmail.com>

From: Wei Yongjun <weiyj.lk@gmail.com>
Date: Wed, 21 Sep 2016 15:04:43 +0000

> From: Wei Yongjun <weiyongjun1@huawei.com>
> 
> Fixes the following sparse warning:
> 
> drivers/net/dsa/qca8k.c:259:22: warning:
>  symbol 'qca8k_regmap_config' was not declared. Should it be static?
> 
> Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>

Applied.

^ permalink raw reply

* Re: [PATCH -next] cxgb4: Convert to use simple_open()
From: David Miller @ 2016-09-22  7:18 UTC (permalink / raw)
  To: weiyj.lk; +Cc: hariprasad, weiyongjun1, netdev
In-Reply-To: <1474470556-2647-1-git-send-email-weiyj.lk@gmail.com>

From: Wei Yongjun <weiyj.lk@gmail.com>
Date: Wed, 21 Sep 2016 15:09:16 +0000

> From: Wei Yongjun <weiyongjun1@huawei.com>
> 
> Remove an open coded simple_open() function and replace file
> operations references to the function with simple_open()
> instead.
> 
> Generated by: scripts/coccinelle/api/simple_open.cocci
> 
> Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>

Applied.

^ permalink raw reply

* Re: [patch net-next 5/6] switchdev: remove FIB offload infrastructure
From: Ido Schimmel @ 2016-09-22  7:25 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: netdev, davem, idosch, eladr, yotamg, nogahf, ogerlitz, roopa,
	nikolay, linville, andy, f.fainelli, dsa, jhs, vivien.didelot,
	andrew, ivecera, kaber, john
In-Reply-To: <1474458794-5512-6-git-send-email-jiri@resnulli.us>

On Wed, Sep 21, 2016 at 01:53:13PM +0200, Jiri Pirko wrote:
> From: Jiri Pirko <jiri@mellanox.com>
> 
> Since this is now taken care of by FIB notifier, remove the code, with
> all unused dependencies.
> 
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>

[...]

> -static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi)
> -{
> -	struct switchdev_attr attr = {
> -		.id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID,
> -	};
> -	struct switchdev_attr prev_attr;
> -	struct net_device *dev = NULL;
> -	int nhsel;
> -
> -	ASSERT_RTNL();
> -
> -	/* For this route, all nexthop devs must be on the same switch. */
> -
> -	for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
> -		const struct fib_nh *nh = &fi->fib_nh[nhsel];
> -
> -		if (!nh->nh_dev)
> -			return NULL;
> -
> -		dev = switchdev_get_lowest_dev(nh->nh_dev);
> -		if (!dev)
> -			return NULL;
> -
> -		attr.orig_dev = dev;
> -		if (switchdev_port_attr_get(dev, &attr))
> -			return NULL;
> -
> -		if (nhsel > 0 &&
> -		    !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid))
> -				return NULL;
> -
> -		prev_attr = attr;
> -	}
> -
> -	return dev;
> -}

[...]

> -int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
> -			   u8 tos, u8 type, u32 nlflags, u32 tb_id)
> -{
> -	struct switchdev_obj_ipv4_fib ipv4_fib = {
> -		.obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB,
> -		.dst = dst,
> -		.dst_len = dst_len,
> -		.fi = fi,
> -		.tos = tos,
> -		.type = type,
> -		.nlflags = nlflags,
> -		.tb_id = tb_id,
> -	};
> -	struct net_device *dev;
> -	int err = 0;
> -
> -	/* Don't offload route if using custom ip rules or if
> -	 * IPv4 FIB offloading has been disabled completely.
> -	 */
> -
> -#ifdef CONFIG_IP_MULTIPLE_TABLES
> -	if (fi->fib_net->ipv4.fib_has_custom_rules)
> -		return 0;
> -#endif
> -
> -	if (fi->fib_net->ipv4.fib_offload_disabled)
> -		return 0;
> -
> -	dev = switchdev_get_dev_by_nhs(fi);

Since this is now removed I believe we should perform this check inside
the drivers. For mlxsw we can simply iterate over the nexthops and make
sure each has a RIF.

> -	if (!dev)
> -		return 0;
> -
> -	ipv4_fib.obj.orig_dev = dev;
> -	err = switchdev_port_obj_add(dev, &ipv4_fib.obj);
> -	if (!err)
> -		fib_info_offload_inc(fi);
> -
> -	return err == -EOPNOTSUPP ? 0 : err;
> -}

^ permalink raw reply

* Re: [patch net-next 6/6] doc: update switchdev L3 section
From: Ido Schimmel @ 2016-09-22  7:31 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: netdev, davem, idosch, eladr, yotamg, nogahf, ogerlitz, roopa,
	nikolay, linville, andy, f.fainelli, dsa, jhs, vivien.didelot,
	andrew, ivecera, kaber, john
In-Reply-To: <1474458794-5512-7-git-send-email-jiri@resnulli.us>

On Wed, Sep 21, 2016 at 01:53:14PM +0200, Jiri Pirko wrote:
> From: Jiri Pirko <jiri@mellanox.com>
> 
> This is to reflect the change of FIB offload infrastructure from
> switchdev objects to FIB notifier.
> 
> Signed-off-by: Jiri Pirko <jiri@mellanox.com>
> ---
>  Documentation/networking/switchdev.txt | 27 ++++++++++++++-------------
>  1 file changed, 14 insertions(+), 13 deletions(-)
> 
> diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt
> index 44235e8..c956ab8 100644
> --- a/Documentation/networking/switchdev.txt
> +++ b/Documentation/networking/switchdev.txt
> @@ -314,30 +314,29 @@ the kernel, with the device doing the FIB lookup and forwarding.  The device
>  does a longest prefix match (LPM) on FIB entries matching route prefix and
>  forwards the packet to the matching FIB entry's nexthop(s) egress ports.
>  
> -To program the device, the driver implements support for
> -SWITCHDEV_OBJ_IPV[4|6]_FIB object using switchdev_port_obj_xxx ops.
> -switchdev_port_obj_add is used for both adding a new FIB entry to the device,
> -or modifying an existing entry on the device.
> +To program the device, the driver has to register a FIB notifier handler
> +using register_fib_notifier. There are following events available:

"The following events are available:" maybe?

> +FIB_EVENT_ENTRY_ADD: used for both adding a new FIB entry to the device,
> +                     or modifying an existing entry on the device.
> +FIB_EVENT_ENTRY_DEL: used for removing a FIB entry
> +FIB_EVENT_RULE_ADD, FIB_EVENT_RULE_DEL: used to propagate FIB rule changes
>  
> -XXX: Currently, only SWITCHDEV_OBJ_ID_IPV4_FIB objects are supported.
> +FIB_EVENT_ENTRY_ADD and FIB_EVENT_ENTRY_DEL events pass:
>  
> -SWITCHDEV_OBJ_ID_IPV4_FIB object passes:
> -
> -	struct switchdev_obj_ipv4_fib {         /* IPV4_FIB */
> +	struct fib_entry_notifier_info {
> +		struct fib_notifier_info info; /* must be first */
>  		u32 dst;
>  		int dst_len;
>  		struct fib_info *fi;
>  		u8 tos;
>  		u8 type;
> -		u32 nlflags;
>  		u32 tb_id;
> -	} ipv4_fib;
> +		u32 nlflags;
> +	};
>  
>  to add/modify/delete IPv4 dst/dest_len prefix on table tb_id.  The *fi
>  structure holds details on the route and route's nexthops.  *dev is one of the
> -port netdevs mentioned in the routes next hop list.  If the output port netdevs
> -referenced in the route's nexthop list don't all have the same switch ID, the
> -driver is not called to add/modify/delete the FIB entry.
> +port netdevs mentioned in the routes next hop list.

s/routes/route's/ ?

Reviewed-by: Ido Schimmel <idosch@mellanox.com>

Thanks!

>  
>  Routes offloaded to the device are labeled with "offload" in the ip route
>  listing:
> @@ -355,6 +354,8 @@ listing:
>  	12.0.0.4 via 11.0.0.9 dev sw1p2  proto zebra  metric 20 offload
>  	192.168.0.0/24 dev eth0  proto kernel  scope link  src 192.168.0.15
>  
> +The "offload" flag is set in case at least one device offloads the FIB entry.
> +
>  XXX: add/mod/del IPv6 FIB API
>  
>  Nexthop Resolution
> -- 
> 2.5.5
> 

^ permalink raw reply

* Re: [PATCH net-next v2 0/6] ftgmac100 support for ast2500
From: David Miller @ 2016-09-22  7:31 UTC (permalink / raw)
  To: joel; +Cc: clg, gwshan, andrew, andrew, netdev, linux-kernel, benh
In-Reply-To: <20160921230503.23309-1-joel@jms.id.au>

From: Joel Stanley <joel@jms.id.au>
Date: Thu, 22 Sep 2016 08:34:57 +0930

> Hello Dave,
> 
> This series adds support to the ftgmac100 driver for the Aspeed ast2400 and
> ast2500 SoCs. In particular, they ensure the driver works correctly on the
> ast2500 where the MAC block has seen some changes in register layout.
> 
> They have been tested on ast2400 and ast2500 systems with the NCSI stack and
> with a directly attached PHY.
> 
> V2 reworks the two patches relating to PHYSTS_CHG into the one patch that
> disables the interrupt instead of playing with interrupt sensitivity. I kept
> patch 4 'net/faraday: Clear stale interrupts' which was first introduced to
> clear the stale PHYSTS_CHG interrupt, as it helps keep us safe from unhygienic
> (vendor) bootloaders.

Series applied, thanks.

^ permalink raw reply

* Re: [PATCH net] tcp: fix under-accounting retransmit SNMP counters
From: David Miller @ 2016-09-22  7:33 UTC (permalink / raw)
  To: ycheng; +Cc: netdev, edumazet
In-Reply-To: <1474499775-26436-1-git-send-email-ycheng@google.com>

From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 21 Sep 2016 16:16:14 -0700

> This patch fixes these under-accounting SNMP rtx stats
> LINUX_MIB_TCPFORWARDRETRANS
> LINUX_MIB_TCPFASTRETRANS
> LINUX_MIB_TCPSLOWSTARTRETRANS
> when retransmitting TSO packets
> 
> Fixes: 10d3be569243 ("tcp-tso: do not split TSO packets at retransmit time")
> Signed-off-by: Yuchung Cheng <ycheng@google.com>

Applied.

^ permalink raw reply

* Re: [PATCH net] tcp: properly account Fast Open SYN-ACK retrans
From: David Miller @ 2016-09-22  7:33 UTC (permalink / raw)
  To: ycheng; +Cc: netdev, edumazet, ncardwell, soheil
In-Reply-To: <1474499775-26436-2-git-send-email-ycheng@google.com>

From: Yuchung Cheng <ycheng@google.com>
Date: Wed, 21 Sep 2016 16:16:15 -0700

> Since the TFO socket is accepted right off SYN-data, the socket
> owner can call getsockopt(TCP_INFO) to collect ongoing SYN-ACK
> retransmission or timeout stats (i.e., tcpi_total_retrans,
> tcpi_retransmits). Currently those stats are only updated
> upon handshake completes. This patch fixes it.
> 
> Signed-off-by: Yuchung Cheng <ycheng@google.com>
> Signed-off-by: Eric Dumazet <edumazet@google.com>
> Signed-off-by: Neal Cardwell <ncardwell@google.com>
> Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>

Applied.

^ permalink raw reply

* [GIT] Networking
From: David Miller @ 2016-09-22  7:52 UTC (permalink / raw)
  To: torvalds; +Cc: akpm, netdev, linux-kernel


Mostly small bits scattered all over the place, which is usually
how things go this late in the -rc series.

1) Proper driver init device resets in bnx2, from Baoquan He.

2) Fix accounting overflow in __tcp_retransmit_skb(), sk_forward_alloc,
   and ip_idents_reserve, from Eric Dumazet.

3) Fix crash in bna driver ethtool stats handling, from Ivan Vecera.

4) Missing check of skb_linearize() return value in mac80211, from
   Johannes Berg.

5) Endianness fix in nf_table_trace dumps, from Liping Zhang.

6) SSN comparison fix in SCTP, from Marcelo Ricardo Leitner.

7) Update DSA and b44 MAINTAINERS entries.

8) Make input path of vti6 driver work again, from Nicolas Dichtel.

9) Off-by-one in mlx4, from Sebastian Ott.

10) Fix fallback route lookup handling in ipv6, from Vincent Bernat.

11) Fix stack corruption on probe in qed driver, from Yuval Mintz.

12) PHY init fixes in r8152 from Hayes Wang.

13) Missing SKB free in irda_accept error path, from Phil Turnbull.

Please pull, thanks a lot!

The following changes since commit da499f8f5385c181e29978fdaab15a58de185302:

  Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net (2016-09-12 07:56:06 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/davem/net.git 

for you to fetch changes up to 7e32b44361abc77fbc01f2b97b045c405b2583e5:

  tcp: properly account Fast Open SYN-ACK retrans (2016-09-22 03:33:01 -0400)

----------------------------------------------------------------
Andrew Lunn (1):
      MAINTAINERS: Add an entry for the core network DSA code

Baoquan He (1):
      bnx2: Reset device during driver initialization

Beni Lev (1):
      iwlwifi: mvm: update TX queue before making a copy of the skb

Christophe Jaillet (1):
      drivers: net: phy: xgene: Fix 'remove' function

David S. Miller (11):
      Merge git://git.kernel.org/.../pablo/nf
      Merge tag 'mac80211-for-davem-2016-09-13' of git://git.kernel.org/.../jberg/mac80211
      Merge tag 'batadv-net-for-davem-20160914' of git://git.open-mesh.org/linux-merge
      Merge branch 'qeth-fixes'
      Merge tag 'mac80211-for-davem-2016-09-16' of git://git.kernel.org/.../jberg/mac80211
      Merge branch 'mlx5-fixes'
      Merge tag 'linux-can-fixes-for-4.8-20160919' of git://git.kernel.org/.../mkl/linux-can
      Merge branch 'r8152-phy-fixes'
      Merge tag 'wireless-drivers-for-davem-2016-09-20' of git://git.kernel.org/.../kvalo/wireless-drivers
      Merge tag 'linux-can-fixes-for-4.8-20160921' of git://git.kernel.org/.../mkl/linux-can
      Merge branch 'master' of git://git.kernel.org/.../klassert/ipsec

Eric Dumazet (3):
      tcp: fix overflow in __tcp_retransmit_skb()
      net: avoid sk_forward_alloc overflows
      net: get rid of an signed integer overflow in ip_idents_reserve()

Fabio Estevam (1):
      can: flexcan: fix resume function

Felix Fietkau (2):
      mac80211: fix tim recalculation after PS response
      mac80211: fix sequence number assignment for PS response frames

Filipe Manco (1):
      xen-netback: fix error handling on netback_probe()

Gao Feng (1):
      netfilter: synproxy: Check oom when adding synproxy and seqadj ct extensions

Giuseppe CAVALLARO (1):
      stmmac: fix PWRDWN into the PMT register for global unicast.

Hans Wippel (1):
      qeth: restore device features after recovery

Hariprasad Shenai (1):
      cxgb4/cxgb4vf: Allocate more queues for 25G and 100G adapter

Ilan Tayari (1):
      xfrm: Fix memory leak of aead algorithm name

Ivan Mikhaylov (2):
      net/ibm/emac: add set mac addr callback
      net/ibm/emac: add mutex to 'set multicast list'

Ivan Vecera (2):
      bna: add missing per queue ethtool stat
      bna: fix crash in bnad_get_strings()

Johannes Berg (3):
      nl80211: validate number of probe response CSA counters
      mac80211: check skb_linearize() return value
      mac80211: reject TSPEC TIDs (TSIDs) for aggregation

Kalle Valo (1):
      Merge tag 'iwlwifi-for-kalle-2016-09-15' of git://git.kernel.org/.../iwlwifi/iwlwifi-fixes

Kamal Heib (1):
      net/mlx4_core: Fix to clean devlink resources

Linus Lüssing (1):
      batman-adv: fix elp packet data reservation

Liping Zhang (2):
      netfilter: nf_tables_trace: fix endiness when dump chain policy
      netfilter: nft_chain_route: re-route before skb is queued to userspace

Marcelo Ricardo Leitner (1):
      sctp: fix SSN comparision

Marek Vasut (1):
      net: can: ifi: Configure transmitter delay

Mark Tomlinson (1):
      net: VRF: Pass original iif to ip_route_input()

Mathias Krause (1):
      xfrm_user: propagate sec ctx allocation errors

Michael Chan (1):
      MAINTAINERS: Update b44 maintainer.

Nicolas Dichtel (1):
      vti6: fix input path

Nikolay Aleksandrov (1):
      ipmr, ip6mr: return lastuse relative to now

Or Gerlitz (2):
      net/mlx5: E-Switch, Fix error flow in the SRIOV e-switch init code
      net/mlx5: E-Switch, Handle mode change failures

Pablo Neira Ayuso (1):
      netfilter: nf_nat: handle NF_DROP from nfnetlink_parse_nat_setup()

Pedersen, Thomas (1):
      mac80211: make mpath path fixing more robust

Roi Dayan (1):
      net/mlx5: Fix flow counter bulk command out mailbox allocation

Sean Wang (1):
      net: ethernet: mediatek: fix module loading automatically based on MODULE_DEVICE_TABLE

Sebastian Ott (1):
      net/mlx4_en: fix off by one in error handling

Sven Eckelmann (1):
      batman-adv: Add missing refcnt for last_candidate

Thomas Richter (1):
      s390/qeth: fix setting VIPA address

Ursula Braun (5):
      s390/qeth: use ip_lock for hsuid configuration
      s390/qeth: allow hsuid configuration in DOWN state
      qeth: check not more than 16 SBALEs on the completion queue
      qeth: do not limit number of gso segments
      qeth: do not turn on SG per default

Vincent Bernat (1):
      net: ipv6: fallback to full lookup if table lookup is unsuitable

Wei Yongjun (1):
      nfp: fix error return code in nfp_net_netdev_open()

Xin Long (1):
      sctp: hold the transport before using it in sctp_hash_cmp

Yuchung Cheng (2):
      tcp: fix under-accounting retransmit SNMP counters
      tcp: properly account Fast Open SYN-ACK retrans

Yuval Mintz (1):
      qed: Fix stack corruption on probe

hayeswang (5):
      r8152: move some functions
      r8152: move enabling PHY
      r8152: move PHY settings to hw_phy_cfg
      r8152: remove r8153_enable_eee
      r8152: disable ALDPS and EEE before setting PHY

phil.turnbull@oracle.com (1):
      irda: Free skb on irda_accept error path.

thomas.zeitlhofer+lkml@ze-it.at (1):
      vti: use right inner_mode for inbound inter address family policy checks

 MAINTAINERS                                                |  11 +-
 drivers/net/can/flexcan.c                                  |  13 ++-
 drivers/net/can/ifi_canfd/ifi_canfd.c                      |  11 +-
 drivers/net/ethernet/broadcom/bnx2.c                       |  12 ++-
 drivers/net/ethernet/brocade/bna/bnad_ethtool.c            |  57 ++++++-----
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h                 |   4 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c            |  15 ++-
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c                 |   7 +-
 drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h              |   6 ++
 drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h         |  15 ++-
 drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c             |   9 +-
 drivers/net/ethernet/ibm/emac/core.c                       |  34 ++++++-
 drivers/net/ethernet/mediatek/mtk_eth_soc.c                |   1 +
 drivers/net/ethernet/mellanox/mlx4/eq.c                    |   4 +-
 drivers/net/ethernet/mellanox/mlx4/main.c                  |   3 +
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c          |   1 +
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c |  20 ++--
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c           |   4 +-
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c        |   8 +-
 drivers/net/ethernet/qlogic/qed/qed_mcp.c                  |   4 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c       |   2 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c          |   2 +-
 drivers/net/phy/mdio-xgene.c                               |   6 +-
 drivers/net/usb/r8152.c                                    | 281 ++++++++++++++++++++++++++-------------------------
 drivers/net/wireless/intel/iwlwifi/mvm/tx.c                |  19 ++--
 drivers/net/xen-netback/xenbus.c                           |  46 ++++++---
 drivers/s390/net/qeth_core.h                               |   1 +
 drivers/s390/net/qeth_core_main.c                          |  32 +++++-
 drivers/s390/net/qeth_l2_main.c                            |   6 +-
 drivers/s390/net/qeth_l3_main.c                            |  29 ++++--
 drivers/s390/net/qeth_l3_sys.c                             |   5 +
 include/net/netfilter/nf_conntrack_synproxy.h              |  14 +++
 include/net/sctp/sm.h                                      |   2 +-
 include/net/sock.h                                         |  10 ++
 include/net/xfrm.h                                         |   4 +-
 net/batman-adv/bat_v_elp.c                                 |   2 +-
 net/batman-adv/routing.c                                   |  28 ++++-
 net/ipv4/ip_input.c                                        |   5 +-
 net/ipv4/ip_vti.c                                          |  15 ++-
 net/ipv4/ipmr.c                                            |   7 +-
 net/ipv4/netfilter/nft_chain_route_ipv4.c                  |  11 +-
 net/ipv4/route.c                                           |  10 +-
 net/ipv4/tcp_input.c                                       |   2 +-
 net/ipv4/tcp_output.c                                      |   7 +-
 net/ipv4/tcp_timer.c                                       |   1 +
 net/ipv6/ip6_vti.c                                         |  19 +++-
 net/ipv6/ip6mr.c                                           |   7 +-
 net/ipv6/netfilter/nft_chain_route_ipv6.c                  |  10 +-
 net/ipv6/route.c                                           |  11 +-
 net/ipv6/xfrm6_input.c                                     |  16 ++-
 net/ipv6/xfrm6_tunnel.c                                    |   2 +-
 net/irda/af_irda.c                                         |   5 +-
 net/mac80211/agg-rx.c                                      |   8 +-
 net/mac80211/agg-tx.c                                      |   3 +
 net/mac80211/mesh_hwmp.c                                   |   3 +-
 net/mac80211/mesh_pathtbl.c                                |   2 +-
 net/mac80211/sta_info.c                                    |   4 +-
 net/mac80211/tx.c                                          |  73 ++++++-------
 net/netfilter/nf_conntrack_core.c                          |   6 +-
 net/netfilter/nf_nat_core.c                                |   5 +-
 net/netfilter/nf_tables_trace.c                            |   2 +-
 net/sctp/input.c                                           |  27 +++--
 net/wireless/nl80211.c                                     |   2 +-
 net/xfrm/xfrm_state.c                                      |   1 +
 net/xfrm/xfrm_user.c                                       |   9 +-
 65 files changed, 658 insertions(+), 343 deletions(-)

^ permalink raw reply

* Re: [PATCH ipsec-next] xfrm: state lookup can be lockless
From: Steffen Klassert @ 2016-09-22  7:52 UTC (permalink / raw)
  To: Florian Westphal; +Cc: netdev
In-Reply-To: <1474379126-32550-1-git-send-email-fw@strlen.de>

On Tue, Sep 20, 2016 at 03:45:26PM +0200, Florian Westphal wrote:
> This is called from the packet input path, we get lock contention
> if many cpus handle ipsec in parallel.
> 
> After recent rcu conversion it is safe to call __xfrm_state_lookup
> without the spinlock.
> 
> Signed-off-by: Florian Westphal <fw@strlen.de>

Applied to ipsec-next, thanks a lot!

^ permalink raw reply

* Re: [PATCH net-next 0/9] rxrpc: Preparation for slow-start algorithm
From: David Howells @ 2016-09-22  8:10 UTC (permalink / raw)
  To: netdev; +Cc: dhowells, linux-afs, linux-kernel
In-Reply-To: <147450474784.14691.229861132515739820.stgit@warthog.procyon.org.uk>

I'm going to post a V2 for this.  I've used a couple of 64-bit division
operators rather than calling the appropriate function (which is fine on
x86_64) and managed to transpose the last two patches (causing an undefined
symbol in one of them).

David

^ permalink raw reply

* [PATCH net-next 0/9] rxrpc: Preparation for slow-start algorithm [ver #2]
From: David Howells @ 2016-09-22  8:22 UTC (permalink / raw)
  To: netdev; +Cc: dhowells, linux-afs, linux-kernel


Here are some patches that prepare for improvements in ACK generation and
for the implementation of the slow-start part of the protocol:

 (1) Stop storing the protocol header in the Tx socket buffers, but rather
     generate it on the fly.  This potentially saves a little space and
     makes it easier to alter the header just before transmission (the
     flags may get altered and the serial number has to be changed).

 (2) Mask off the Tx buffer annotations and add a flag to record which ones
     have already been resent.

 (3) Track RTT on a per-peer basis for use in future changes.  Tracepoints
     are added to log this.

 (4) Send PING ACKs in response to incoming calls to elicit a PING-RESPONSE
     ACK from which RTT data can be calculated.  The response also carries
     other useful information.

 (5) Expedite PING-RESPONSE ACK generation from sendmsg.  If we're actively
     using sendmsg, this allows us, under some circumstances, to avoid
     having to rely on the background work item to run to generate this
     ACK.

     This requires ktime_sub_ms() to be added.

 (6) Set the REQUEST-ACK flag on some DATA packets to elicit ACK-REQUESTED
     ACKs from which RTT data can be calculated.

 (7) Limit the use of pings and ACK requests for RTT determination.

Changes:

 (V2) Don't use the C division operator for 64-bit division.  One instance
      should use do_div() and the other should be using nsecs_to_jiffies().

      The last two patches got transposed, leading to an undefined symbol
      in one of them.

      Reported-by: kbuild test robot <lkp@intel.com>

The patches can be found here also:

	http://git.kernel.org/cgit/linux/kernel/git/dhowells/linux-fs.git/log/?h=rxrpc-rewrite

Tagged thusly:

	git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs.git
	rxrpc-rewrite-20160922-v2

David
---
David Howells (9):
      rxrpc: Don't store the rxrpc header in the Tx queue sk_buffs
      rxrpc: Add re-sent Tx annotation
      rxrpc: Add per-peer RTT tracker
      rxrpc: Send pings to get RTT data
      rxrpc: Expedite ping response transmission
      rxrpc: Add ktime_sub_ms()
      rxrpc: Obtain RTT data by requesting ACKs on DATA packets
      rxrpc: Reduce the number of ACK-Requests sent
      rxrpc: Reduce the number of PING ACKs sent


 include/linux/ktime.h        |    5 ++
 include/trace/events/rxrpc.h |   61 ++++++++++++++++++++++
 net/rxrpc/ar-internal.h      |   47 ++++++++++++-----
 net/rxrpc/call_event.c       |   56 ++++++++++----------
 net/rxrpc/conn_object.c      |    1 
 net/rxrpc/input.c            |  100 ++++++++++++++++++++++++++++++++++--
 net/rxrpc/misc.c             |   25 ++++++---
 net/rxrpc/output.c           |  117 ++++++++++++++++++++++++++++++++----------
 net/rxrpc/peer_event.c       |   41 +++++++++++++++
 net/rxrpc/peer_object.c      |    1 
 net/rxrpc/rxkad.c            |    8 +--
 net/rxrpc/sendmsg.c          |   56 ++++----------------
 net/rxrpc/sysctl.c           |    2 -
 13 files changed, 390 insertions(+), 130 deletions(-)

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox