Netdev List
 help / color / mirror / Atom feed
* [PATCH iproute2] xfrm: add support of ESN and anti-replay window
From: Nicolas Dichtel @ 2014-10-20  9:23 UTC (permalink / raw)
  To: shemminger; +Cc: netdev, dingzhi, Adrien Mazarguil, Nicolas Dichtel

From: dingzhi <zhi.ding@6wind.com>

This patch allows to configure ESN and anti-replay window.

Signed-off-by: dingzhi <zhi.ding@6wind.com>
Signed-off-by: Adrien Mazarguil <adrien.mazarguil@6wind.com>
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
---
 ip/ipxfrm.c     | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 ip/xfrm_state.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 109 insertions(+), 7 deletions(-)

diff --git a/ip/ipxfrm.c b/ip/ipxfrm.c
index f5f78ca6b968..659fa6b64579 100644
--- a/ip/ipxfrm.c
+++ b/ip/ipxfrm.c
@@ -806,6 +806,62 @@ void xfrm_xfrma_print(struct rtattr *tb[], __u16 family,
 		fprintf(fp, "%s", _SL_);
 	}
 
+	if (tb[XFRMA_REPLAY_VAL]) {
+		struct xfrm_replay_state *replay;
+
+		if (prefix)
+			fputs(prefix, fp);
+		fprintf(fp, "anti-replay context: ");
+
+		if (RTA_PAYLOAD(tb[XFRMA_REPLAY_VAL]) < sizeof(*replay)) {
+			fprintf(fp, "(ERROR truncated)");
+			fprintf(fp, "%s", _SL_);
+			return;
+		}
+
+		replay = (struct xfrm_replay_state *)RTA_DATA(tb[XFRMA_REPLAY_VAL]);
+		fprintf(fp, "seq 0x%x, oseq 0x%x, bitmap 0x%08x",
+			replay->seq, replay->oseq, replay->bitmap);
+		fprintf(fp, "%s", _SL_);
+	}
+
+	if (tb[XFRMA_REPLAY_ESN_VAL]) {
+		struct xfrm_replay_state_esn *replay;
+		unsigned int i, j;
+
+		if (prefix)
+			fputs(prefix, fp);
+		fprintf(fp, "anti-replay esn context:");
+
+		if (RTA_PAYLOAD(tb[XFRMA_REPLAY_ESN_VAL]) < sizeof(*replay)) {
+			fprintf(fp, "(ERROR truncated)");
+			fprintf(fp, "%s", _SL_);
+			return;
+		}
+		fprintf(fp, "%s", _SL_);
+
+		replay = (struct xfrm_replay_state_esn *)RTA_DATA(tb[XFRMA_REPLAY_ESN_VAL]);
+		if (prefix)
+			fputs(prefix, fp);
+		fprintf(fp, " seq-hi 0x%x, seq 0x%x, oseq-hi 0x%0x, oseq 0x%0x",
+			replay->seq_hi, replay->seq, replay->oseq_hi,
+			replay->oseq);
+		fprintf(fp, "%s", _SL_);
+		if (prefix)
+			fputs(prefix, fp);
+		fprintf(fp, " replay_window %u, bitmap-length %u",
+			replay->replay_window, replay->bmp_len);
+		for (i = replay->bmp_len, j = 0; i; i--) {
+			if (j++ % 8 == 0) {
+				fprintf(fp, "%s", _SL_);
+				if (prefix)
+					fputs(prefix, fp);
+				fprintf(fp, " ");
+			}
+			fprintf(fp, "%08x ", replay->bmp[i - 1]);
+		}
+		fprintf(fp, "%s", _SL_);
+	}
 }
 
 static int xfrm_selector_iszero(struct xfrm_selector *s)
@@ -849,6 +905,7 @@ void xfrm_state_info_print(struct xfrm_usersa_info *xsinfo,
 		XFRM_FLAG_PRINT(fp, flags, XFRM_STATE_ICMP, "icmp");
 		XFRM_FLAG_PRINT(fp, flags, XFRM_STATE_AF_UNSPEC, "af-unspec");
 		XFRM_FLAG_PRINT(fp, flags, XFRM_STATE_ALIGN4, "align4");
+		XFRM_FLAG_PRINT(fp, flags, XFRM_STATE_ESN, "esn");
 		if (flags)
 			fprintf(fp, "%x", flags);
 	}
diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c
index fe7708e533f3..2ad3d8d37fbe 100644
--- a/ip/xfrm_state.c
+++ b/ip/xfrm_state.c
@@ -58,6 +58,7 @@ static void usage(void)
 	fprintf(stderr, "Usage: ip xfrm state { add | update } ID [ ALGO-LIST ] [ mode MODE ]\n");
 	fprintf(stderr, "        [ mark MARK [ mask MASK ] ] [ reqid REQID ] [ seq SEQ ]\n");
 	fprintf(stderr, "        [ replay-window SIZE ] [ replay-seq SEQ ] [ replay-oseq SEQ ]\n");
+	fprintf(stderr, "        [ replay-seq-hi SEQ ] [ replay-oseq-hi SEQ ]\n");
 	fprintf(stderr, "        [ flag FLAG-LIST ] [ sel SELECTOR ] [ LIMIT-LIST ] [ encap ENCAP ]\n");
 	fprintf(stderr, "        [ coa ADDR[/PLEN] ] [ ctx CTX ] [ extra-flag EXTRA-FLAG-LIST ]\n");
 	fprintf(stderr, "Usage: ip xfrm state allocspi ID [ mode MODE ] [ mark MARK [ mask MASK ] ]\n");
@@ -87,7 +88,7 @@ static void usage(void)
 	fprintf(stderr, " ALGO-NAME\n");
 	fprintf(stderr, "MODE := transport | tunnel | beet | ro | in_trigger\n");
 	fprintf(stderr, "FLAG-LIST := [ FLAG-LIST ] FLAG\n");
-	fprintf(stderr, "FLAG := noecn | decap-dscp | nopmtudisc | wildrecv | icmp | af-unspec | align4\n");
+	fprintf(stderr, "FLAG := noecn | decap-dscp | nopmtudisc | wildrecv | icmp | af-unspec | align4 | esn\n");
 	fprintf(stderr, "EXTRA-FLAG-LIST := [ EXTRA-FLAG-LIST ] EXTRA-FLAG\n");
 	fprintf(stderr, "EXTRA-FLAG := dont-encap-dscp\n");
 	fprintf(stderr, "SELECTOR := [ src ADDR[/PLEN] ] [ dst ADDR[/PLEN] ] [ dev DEV ] [ UPSPEC ]\n");
@@ -214,6 +215,8 @@ static int xfrm_state_flag_parse(__u8 *flags, int *argcp, char ***argvp)
 				*flags |= XFRM_STATE_AF_UNSPEC;
 			else if (strcmp(*argv, "align4") == 0)
 				*flags |= XFRM_STATE_ALIGN4;
+			else if (strcmp(*argv, "esn") == 0)
+				*flags |= XFRM_STATE_ESN;
 			else {
 				PREV_ARG(); /* back track */
 				break;
@@ -273,6 +276,9 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv)
 		char  			buf[RTA_BUF_SIZE];
 	} req;
 	struct xfrm_replay_state replay;
+	struct xfrm_replay_state_esn replay_esn;
+	__u32 replay_window = 0;
+	__u32 seq = 0, oseq = 0, seq_hi = 0, oseq_hi = 0;
 	char *idp = NULL;
 	char *aeadop = NULL;
 	char *ealgop = NULL;
@@ -289,6 +295,7 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv)
 
 	memset(&req, 0, sizeof(req));
 	memset(&replay, 0, sizeof(replay));
+	memset(&replay_esn, 0, sizeof(replay_esn));
 	memset(&ctx, 0, sizeof(ctx));
 
 	req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.xsinfo));
@@ -315,16 +322,24 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv)
 			xfrm_seq_parse(&req.xsinfo.seq, &argc, &argv);
 		} else if (strcmp(*argv, "replay-window") == 0) {
 			NEXT_ARG();
-			if (get_u8(&req.xsinfo.replay_window, *argv, 0))
+			if (get_u32(&replay_window, *argv, 0))
 				invarg("value after \"replay-window\" is invalid", *argv);
 		} else if (strcmp(*argv, "replay-seq") == 0) {
 			NEXT_ARG();
-			if (get_u32(&replay.seq, *argv, 0))
+			if (get_u32(&seq, *argv, 0))
 				invarg("value after \"replay-seq\" is invalid", *argv);
+		} else if (strcmp(*argv, "replay-seq-hi") == 0) {
+			NEXT_ARG();
+			if (get_u32(&seq_hi, *argv, 0))
+				invarg("value after \"replay-seq-hi\" is invalid", *argv);
 		} else if (strcmp(*argv, "replay-oseq") == 0) {
 			NEXT_ARG();
-			if (get_u32(&replay.oseq, *argv, 0))
+			if (get_u32(&oseq, *argv, 0))
 				invarg("value after \"replay-oseq\" is invalid", *argv);
+		} else if (strcmp(*argv, "replay-oseq-hi") == 0) {
+			NEXT_ARG();
+			if (get_u32(&oseq_hi, *argv, 0))
+				invarg("value after \"replay-oseq-hi\" is invalid", *argv);
 		} else if (strcmp(*argv, "flag") == 0) {
 			NEXT_ARG();
 			xfrm_state_flag_parse(&req.xsinfo.flags, &argc, &argv);
@@ -514,9 +529,39 @@ static int xfrm_state_modify(int cmd, unsigned flags, int argc, char **argv)
 		argc--; argv++;
 	}
 
-	if (replay.seq || replay.oseq)
-		addattr_l(&req.n, sizeof(req.buf), XFRMA_REPLAY_VAL,
-			  (void *)&replay, sizeof(replay));
+	if (req.xsinfo.flags & XFRM_STATE_ESN &&
+	    replay_window == 0) {
+		fprintf(stderr, "Error: esn flag set without replay-window.\n");
+		exit(-1);
+	}
+
+	if (replay_window > XFRMA_REPLAY_ESN_MAX) {
+		fprintf(stderr,
+			"Error: replay-window (%u) > XFRMA_REPLAY_ESN_MAX (%u).\n",
+			replay_window, XFRMA_REPLAY_ESN_MAX);
+		exit(-1);
+	}
+
+	if (req.xsinfo.flags & XFRM_STATE_ESN ||
+	    replay_window > (sizeof(replay.bitmap) * 8)) {
+		replay_esn.seq = seq;
+		replay_esn.oseq = oseq;
+		replay_esn.seq_hi = seq_hi;
+		replay_esn.oseq_hi = oseq_hi;
+		replay_esn.replay_window = replay_window;
+		replay_esn.bmp_len = (replay_window + sizeof(__u32) * 8 - 1) /
+				     (sizeof(__u32) * 8);
+		addattr_l(&req.n, sizeof(req.buf), XFRMA_REPLAY_ESN_VAL,
+			  &replay_esn, sizeof(replay_esn));
+	} else {
+		if (seq || oseq) {
+			replay.seq = seq;
+			replay.oseq = oseq;
+			addattr_l(&req.n, sizeof(req.buf), XFRMA_REPLAY_VAL,
+				  &replay, sizeof(replay));
+		}
+		req.xsinfo.replay_window = replay_window;
+	}
 
 	if (extra_flags)
 		addattr32(&req.n, sizeof(req.buf), XFRMA_SA_EXTRA_FLAGS,
-- 
2.1.0

^ permalink raw reply related

* Re: v3.18-rc1 bloat-o-meter
From: Borislav Petkov @ 2014-10-20  9:16 UTC (permalink / raw)
  To: Geert Uytterhoeven
  Cc: Linux Embedded, Josh Triplett, Linux Kernel Development,
	netdev@vger.kernel.org
In-Reply-To: <CAMuHMdV5VPEqJ2WeZTc=Tg_vSRJPYgzQib9m-_fnqLzJ_vpDdA@mail.gmail.com>

On Mon, Oct 20, 2014 at 11:09:28AM +0200, Geert Uytterhoeven wrote:
> Sure. Compile two kernel images, and run
> 
>     scripts/bloat-o-meter <image1> <image2>

Ha, that's even upstream :-)

Thanks Geert!

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply

* [PATCH] net: Remove trailing whitespace in tcp.h icmp.c syncookies.c
From: Kenjiro Nakayama @ 2014-10-20  9:15 UTC (permalink / raw)
  To: linux-kernel; +Cc: netdev, Kenjiro Nakayama

Remove trailing whitespace in tcp.h icmp.c syncookies.c

Signed-off-by: Kenjiro Nakayama <nakayamakenjiro@gmail.com>
---
 include/net/tcp.h     | 12 ++++++------
 net/ipv6/icmp.c       |  1 -
 net/ipv6/syncookies.c |  1 -
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 4062b4f..c73fc14 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -55,9 +55,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define MAX_TCP_HEADER	(128 + MAX_HEADER)
 #define MAX_TCP_OPTION_SPACE 40
 
-/* 
+/*
  * Never offer a window over 32767 without using window scaling. Some
- * poor stacks do signed 16bit maths! 
+ * poor stacks do signed 16bit maths!
  */
 #define MAX_TCP_WINDOW		32767U
 
@@ -167,7 +167,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 /*
  *	TCP option
  */
- 
+
 #define TCPOPT_NOP		1	/* Padding */
 #define TCPOPT_EOL		0	/* End of options */
 #define TCPOPT_MSS		2	/* Segment size negotiating */
@@ -1104,16 +1104,16 @@ static inline int tcp_win_from_space(int space)
 		space - (space>>sysctl_tcp_adv_win_scale);
 }
 
-/* Note: caller must be prepared to deal with negative returns */ 
+/* Note: caller must be prepared to deal with negative returns */
 static inline int tcp_space(const struct sock *sk)
 {
 	return tcp_win_from_space(sk->sk_rcvbuf -
 				  atomic_read(&sk->sk_rmem_alloc));
-} 
+}
 
 static inline int tcp_full_space(const struct sock *sk)
 {
-	return tcp_win_from_space(sk->sk_rcvbuf); 
+	return tcp_win_from_space(sk->sk_rcvbuf);
 }
 
 static inline void tcp_openreq_init(struct request_sock *req,
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 97ae700..62c1037 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -1009,4 +1009,3 @@ struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
 	return table;
 }
 #endif
-
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 2f25cb6..0e26e79 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -269,4 +269,3 @@ out_free:
 	reqsk_free(req);
 	return NULL;
 }
-
-- 
1.9.3

^ permalink raw reply related

* Re: v3.18-rc1 bloat-o-meter
From: Geert Uytterhoeven @ 2014-10-20  9:09 UTC (permalink / raw)
  To: Borislav Petkov
  Cc: Linux Embedded, Josh Triplett, Linux Kernel Development,
	netdev@vger.kernel.org
In-Reply-To: <20141020090637.GA3654@pd.tnic>

Hi Boris,

On Mon, Oct 20, 2014 at 11:06 AM, Borislav Petkov <bp@alien8.de> wrote:
> On Mon, Oct 20, 2014 at 10:37:19AM +0200, Geert Uytterhoeven wrote:
>>       Hi all,
>>
>> Below is the bloat-o-meter output when comparing an m68k/atari_defconfig
>> kernel for v3.17 and v3.18-rc1.
>
> That looks cool, can I run this for other arches too?

Sure. Compile two kernel images, and run

    scripts/bloat-o-meter <image1> <image2>

Gr{oetje,eeting}s,

                        Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
                                -- Linus Torvalds

^ permalink raw reply

* Re: v3.18-rc1 bloat-o-meter
From: Borislav Petkov @ 2014-10-20  9:06 UTC (permalink / raw)
  To: Geert Uytterhoeven
  Cc: linux-embedded, Josh Triplett, Linux Kernel Development, netdev
In-Reply-To: <alpine.DEB.2.02.1410201018230.24954@ayla.of.borg>

On Mon, Oct 20, 2014 at 10:37:19AM +0200, Geert Uytterhoeven wrote:
> 	Hi all,
> 
> Below is the bloat-o-meter output when comparing an m68k/atari_defconfig
> kernel for v3.17 and v3.18-rc1.

That looks cool, can I run this for other arches too?

-- 
Regards/Gruss,
    Boris.

Sent from a fat crate under my desk. Formatting is fine.
--

^ permalink raw reply

* [PATCH 2/2] xfrm6: fix a potential use after free in xfrm6_policy.c
From: roy.qing.li @ 2014-10-20  8:49 UTC (permalink / raw)
  To: netdev; +Cc: steffen.klassert
In-Reply-To: <1413794954-16967-1-git-send-email-roy.qing.li@gmail.com>

From: Li RongQing <roy.qing.li@gmail.com>

pskb_may_pull() maybe change skb->data and make nh and exthdr pointer
oboslete, so recompute the nd and exthdr

Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
---
 net/ipv6/xfrm6_policy.c |   11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index ac49f84..115fd3b 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -170,8 +170,10 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 		case IPPROTO_DCCP:
 			if (!onlyproto && (nh + offset + 4 < skb->data ||
 			     pskb_may_pull(skb, nh + offset + 4 - skb->data))) {
-				__be16 *ports = (__be16 *)exthdr;
+				__be16 *ports;
 
+				nh = skb_network_header(skb);
+				ports = (__be16*)(nh + offset);
 				fl6->fl6_sport = ports[!!reverse];
 				fl6->fl6_dport = ports[!reverse];
 			}
@@ -180,8 +182,10 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 
 		case IPPROTO_ICMPV6:
 			if (!onlyproto && pskb_may_pull(skb, nh + offset + 2 - skb->data)) {
-				u8 *icmp = (u8 *)exthdr;
+				u8 *icmp;
 
+				nh = skb_network_header(skb);
+				icmp = (u8*)(nh + offset);
 				fl6->fl6_icmp_type = icmp[0];
 				fl6->fl6_icmp_code = icmp[1];
 			}
@@ -192,8 +196,9 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 		case IPPROTO_MH:
 			if (!onlyproto && pskb_may_pull(skb, nh + offset + 3 - skb->data)) {
 				struct ip6_mh *mh;
-				mh = (struct ip6_mh *)exthdr;
 
+				nh = skb_network_header(skb);
+				mh = (struct ip6_mh*)(nh + offset);
 				fl6->fl6_mh_type = mh->ip6mh_type;
 			}
 			fl6->flowi6_proto = nexthdr;
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 1/2] xfrm: fix a potential use after free in xfrm4_policy.c
From: roy.qing.li @ 2014-10-20  8:49 UTC (permalink / raw)
  To: netdev; +Cc: steffen.klassert

From: Li RongQing <roy.qing.li@gmail.com>

pskb_may_pull() maybe change skb->data and make xprth pointer oboslete,
so recompute the xprth

Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
---
 net/ipv4/xfrm4_policy.c |   19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 6156f68..a4d8177 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -98,11 +98,14 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
 	return 0;
 }
 
+#define NEXT_HEAD(skb) (skb_network_header(skb) + ihl)
+
 static void
 _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 {
 	const struct iphdr *iph = ip_hdr(skb);
-	u8 *xprth = skb_network_header(skb) + iph->ihl * 4;
+	int ihl = iph->ihl * 4;
+	u8 *xprth = NEXT_HEAD(skb);
 	struct flowi4 *fl4 = &fl->u.ip4;
 	int oif = 0;
 
@@ -122,7 +125,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 		case IPPROTO_DCCP:
 			if (xprth + 4 < skb->data ||
 			    pskb_may_pull(skb, xprth + 4 - skb->data)) {
-				__be16 *ports = (__be16 *)xprth;
+				__be16 *ports = (__be16 *)NEXT_HEAD(skb);
 
 				fl4->fl4_sport = ports[!!reverse];
 				fl4->fl4_dport = ports[!reverse];
@@ -131,7 +134,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 
 		case IPPROTO_ICMP:
 			if (pskb_may_pull(skb, xprth + 2 - skb->data)) {
-				u8 *icmp = xprth;
+				u8 *icmp = NEXT_HEAD(skb);
 
 				fl4->fl4_icmp_type = icmp[0];
 				fl4->fl4_icmp_code = icmp[1];
@@ -140,7 +143,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 
 		case IPPROTO_ESP:
 			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
-				__be32 *ehdr = (__be32 *)xprth;
+				__be32 *ehdr = (__be32 *)NEXT_HEAD(skb);
 
 				fl4->fl4_ipsec_spi = ehdr[0];
 			}
@@ -148,7 +151,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 
 		case IPPROTO_AH:
 			if (pskb_may_pull(skb, xprth + 8 - skb->data)) {
-				__be32 *ah_hdr = (__be32 *)xprth;
+				__be32 *ah_hdr = (__be32 *)NEXT_HEAD(skb);
 
 				fl4->fl4_ipsec_spi = ah_hdr[1];
 			}
@@ -156,7 +159,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 
 		case IPPROTO_COMP:
 			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
-				__be16 *ipcomp_hdr = (__be16 *)xprth;
+				__be16 *ipcomp_hdr = (__be16 *)NEXT_HEAD(skb);
 
 				fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));
 			}
@@ -164,8 +167,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 
 		case IPPROTO_GRE:
 			if (pskb_may_pull(skb, xprth + 12 - skb->data)) {
-				__be16 *greflags = (__be16 *)xprth;
-				__be32 *gre_hdr = (__be32 *)xprth;
+				__be16 *greflags = (__be16 *)NEXT_HEAD(skb);
+				__be32 *gre_hdr = (__be32 *)NEXT_HEAD(skb);
 
 				if (greflags[0] & GRE_KEY) {
 					if (greflags[0] & GRE_CSUM)
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH] Documentation: ptp: Fix build failure on MIPS cross builds
From: Markos Chandras @ 2014-10-20  8:42 UTC (permalink / raw)
  To: linux-mips
  Cc: Markos Chandras, Richard Cochran, Jonathan Corbet, netdev,
	linux-doc, linux-kernel

The MIPS system calls are defined based on the -mabi gcc option.
However, the testptp is built on the host using the unistd header
from the kernel sources which were built for the MIPS architecture
thus guarded with the __MIPS_SIM_{ABI64, ABI32, NABI32} definitions
leading to the following build problem:

Documentation/ptp/testptp.c: In function 'clock_adjtime':
Documentation/ptp/testptp.c:55: error: '__NR_clock_adjtime'
undeclared (first use in this function)
Documentation/ptp/testptp.c:55: error: (Each undeclared identifier is reported
only once Documentation/ptp/testptp.c:55: error: for each function it appears in.)

This fix is similar to e9107f88c985bcda ("samples/seccomp/Makefile: do not build
tests if cross-compiling for MIPS")

Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: netdev@vger.kernel.org
Cc: linux-doc@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Markos Chandras <markos.chandras@imgtec.com>
---
 Documentation/ptp/Makefile | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/Documentation/ptp/Makefile b/Documentation/ptp/Makefile
index 293d6c09a11f..397c1cd2eda7 100644
--- a/Documentation/ptp/Makefile
+++ b/Documentation/ptp/Makefile
@@ -1,5 +1,15 @@
 # List of programs to build
+ifndef CROSS_COMPILE
 hostprogs-y := testptp
+else
+# MIPS system calls are defined based on the -mabi that is passed
+# to the toolchain which may or may not be a valid option
+# for the host toolchain. So disable testptp if target architecture
+# is MIPS but the host isn't.
+ifndef CONFIG_MIPS
+hostprogs-y := testptp
+endif
+endif
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
-- 
2.1.2


^ permalink raw reply related

* v3.18-rc1 bloat-o-meter
From: Geert Uytterhoeven @ 2014-10-20  8:37 UTC (permalink / raw)
  To: linux-embedded; +Cc: Josh Triplett, Linux Kernel Development, netdev

	Hi all,

Below is the bloat-o-meter output when comparing an m68k/atari_defconfig
kernel for v3.17 and v3.18-rc1.

Major culprit seems to be bpf. Can this become modular or optional?
Currently it's always included if CONFIG_NET=y.

Thanks!

add/remove: 374/146 grow/shrink: 392/323 up/down: 57311/-24659 (32652)
function                                     old     new   delta
do_check                                       -    4402   +4402
bpf_check                                      -    1976   +1976
sys_bpf                                        -    1238   +1238
SyS_bpf                                        -    1238   +1238
xfrm_hash_resize                            1530    2602   +1072
__skb_flow_dissect                             -    1018   +1018
pcpu_balance_workfn                            -     954    +954
policy_hash_direct                           118    1034    +916
policy_hash_bysel                            178    1094    +916
check_mem_access                               -     834    +834
udp6_gro_receive                               -     802    +802
__ip_options_echo                              -     802    +802
validate_xmit_skb                              -     746    +746
bpf_prog_load                                  -     670    +670
udp4_gro_receive                               -     634    +634
load_elf_binary                             2128    2698    +570
string_escape_mem                              -     568    +568
__bioset_create                                -     556    +556
pcpu_create_chunk                              -     432    +432
xfrm_hash_rebuild                              -     424    +424
bt_for_each                                    -     416    +416
iov_iter_zero                                  -     394    +394
pcpu_next_unpop                                -     390    +390
pcpu_next_pop                                  -     388    +388
escaped_string                                 -     364    +364
tcp6_gso_segment                               -     362    +362
__percpu_ref_switch_to_atomic                  -     344    +344
alloc_skb_with_frags                           -     342    +342
check_func_arg                                 -     340    +340
nlmsvc_lock                                  410     736    +326
guard_bio_eod                                  -     306    +306
vm_pgprot_modify                               -     304    +304
udp6_ufo_fragment                            470     768    +298
sd_init_command                             3596    3892    +296
nfs_volume_list_show                           -     296    +296
pcpu_alloc_area                              502     790    +288
map_lookup_elem                                -     280    +280
push_insn                                      -     276    +276
tcp6_gro_receive                             240     510    +270
devkmsg_write                                  -     258    +258
is_state_visited                               -     254    +254
t10_pi_verify                                  -     252    +252
dma_common_contiguous_remap                    -     250    +250
copy_to_iter                                   -     246    +246
copy_from_iter                                 -     246    +246
bio_integrity_process                          -     244    +244
generic_setlease                             580     822    +242
find_mergeable                                 -     238    +238
print_verifier_state                           -     230    +230
skb_udp_tunnel_segment                       716     944    +228
dns_resolver_cmp                               -     224    +224
copy_from_iter_bvec                            -     220    +220
copy_to_iter_bvec                              -     218    +218
tcp4_gro_receive                             198     400    +202
__kmalloc_track_caller                         -     202    +202
check_stack_boundary                           -     200    +200
__percpu_ref_switch_to_percpu                  -     200    +200
do_mount                                    2062    2260    +198
tcp_recvmsg                                 2118    2308    +190
__udp4_lib_rcv                              2128    2314    +186
sys_cacheflush                              1136    1318    +182
ethtool_set_tunable                            -     180    +180
ethtool_get_tunable                            -     180    +180
tty_send_xchar                                 -     178    +178
icmpv4_xrlim_allow                             -     176    +176
__skb_flow_get_ports                           -     176    +176
packet_sendmsg                              3066    3240    +174
nlm_end_grace_write                            -     170    +170
path_setxattr                                  -     168    +168
scsi_probe_and_add_lun                      2224    2388    +164
bpf_prog_realloc                               -     164    +164
tcp_ecn_check_ce                               -     162    +162
skb_entail                                     -     162    +162
dump_flags                                     -     162    +162
pcpu_count_occupied_pages                      -     160    +160
dev_vprintk_emit                             352     506    +154
tcp4_gso_segment                               -     150    +150
t10_pi_generate                                -     150    +150
path_removexattr                               -     150    +150
nlm_end_grace_read                             -     150    +150
d_invalidate                                 106     256    +150
udp6_gro_complete                              -     146    +146
release_task                                 896    1042    +146
nfs_init_commit                              170     316    +146
find_exception                                86     232    +146
bpf_prog_alloc                                 -     146    +146
__skb_complete_tx_timestamp                    -     146    +146
icmp_global_allow                              -     142    +142
blk_integrity_merge_rq                       104     246    +142
sd_dif_complete                              464     604    +140
blk_alloc_flush_queue                          -     140    +140
update_or_create_fnhe                        394     532    +138
sock_dequeue_err_skb                           -     132    +132
ethtool_set_one_feature                        -     132    +132
__skb_gro_checksum_complete                    -     132    +132
skb_clone_sk                                   -     130    +130
path_getxattr                                  -     130    +130
thread_group_cputime                         114     242    +128
tcp_write_timer_handler                      340     468    +128
dev_gro_receive                              740     868    +128
__is_local_mountpoint                          -     128    +128
__dev_get_by_flags                             -     128    +128
sd_dif_prepare                               352     478    +126
blk_mq_end_request                             -     126    +126
blk_integrity_merge_bio                       74     200    +126
wait_on_page_bit_killable_timeout              -     124    +124
path_listxattr                                 -     124    +124
pcpu_alloc                                   982    1104    +122
pagemap_pte_range                            472     594    +122
devm_request_resource                          -     122    +122
__bpf_prog_run                              6124    6246    +122
cleanup_mnt                                    -     118    +118
region_is_ram                                  -     116    +116
lockd_end_grace_operations                     -     116    +116
bpf_prog_fops                                  -     116    +116
bpf_map_fops                                   -     116    +116
task_of_stack                                  -     114    +114
out_of_line_wait_on_bit_timeout                -     114    +114
gnet_stats_copy_queue                         98     212    +114
check_reg_arg                                  -     114    +114
eth_get_headlen                                -     112    +112
udp4_gro_complete                              -     110    +110
grace_exit_net                                 -     110    +110
read_iter_zero                                 -     108    +108
locks_copy_conflock                            -     108    +108
bpf_prog_get                                   -     108    +108
tcp_send_ack                                 210     316    +106
proc_mem_open                                  -     106    +106
nfs_volume_list_start                          -     106    +106
__blk_drain_queue                            276     382    +106
nfs_volume_list_next                           -     104    +104
icmp_rcv                                     746     850    +104
blk_mq_init_queue                           1696    1800    +104
xfrm_net_init                                436     538    +102
__qdisc_run                                  254     356    +102
percpu_ref_switch_to_atomic_rcu                -     100    +100
dma_pool_create                              336     436    +100
n_tty_ioctl_helper                           218     314     +96
dma_common_free_remap                          -      96     +96
__detach_mounts                                -      96     +96
dma_common_pages_remap                         -      94     +94
__gnet_stats_copy_basic                        -      94     +94
pop_stack                                      -      92     +92
percpu_ref_call_confirm_rcu                    -      92     +92
pcpu_setup_first_chunk                      2312    2404     +92
tcp_assign_congestion_control                  -      90     +90
lookup_mountpoint                              -      90     +90
grace_init_net                                 -      90     +90
udp4_ufo_fragment                            282     370     +88
blk_mq_check_expired                           -      88     +88
bit_wait_timeout                               -      88     +88
bit_wait_io_timeout                            -      88     +88
tcp_v4_rcv                                  1616    1702     +86
tcp_ack                                     3184    3270     +86
raw_sendmsg                                 2002    2088     +86
udp6_csum_init                               498     582     +84
skb_get_poff                                   -      84     +84
lockd_create_procfs                            -      84     +84
__dev_queue_xmit                            1010    1094     +84
verbose                                        -      82     +82
pty_stop                                       -      82     +82
pty_start                                      -      82     +82
__get_hash_thresh                              -      82     +82
validate_xmit_skb_list                         -      80     +80
nfs_volume_list_stop                           -      80     +80
bio_integrity_advance                        230     310     +80
skb_complete_tx_timestamp                      -      78     +78
sys_prctl                                   1446    1522     +76
do_setlink                                  2016    2092     +76
add_partition                                868     944     +76
__blk_mq_end_request                           -      76     +76
SyS_prctl                                   1446    1522     +76
tcp_v4_err                                  1300    1374     +74
sch_direct_xmit                              360     434     +74
pcpu_need_to_extend                           38     112     +74
blk_mq_exit_hw_queues                        136     210     +74
bin2hex                                        -      74     +74
test_ctx_access                                -      72     +72
setup_cpu_cache                              630     702     +72
lease_setup                                    -      72     +72
__mutex_lock_killable_slowpath               190     262     +72
__mutex_lock_interruptible_slowpath          180     252     +72
nfs_release_page                              72     142     +70
any_leases_conflict                            -      70     +70
__mutex_lock_slowpath                        148     218     +70
__d_free_external                              -      70     +70
vma_set_page_prot                              -      68     +68
show_map_vma                                 622     690     +68
pcpu_map_extend_workfn                         -      68     +68
m_cache_vma                                    -      68     +68
blk_mq_rq_timed_out                           46     114     +68
free_used_maps                                 -      66     +66
bpf_map_put                                    -      66     +66
blk_mq_queue_enter                           244     310     +66
alloc_kmem_cache_cpus                          -      66     +66
__kmem_cache_alias                             -      66     +66
lease_alloc                                   62     126     +64
integrity_verify_store                         -      64     +64
integrity_generate_store                       -      64     +64
do_coredump                                 2878    2942     +64
bpf_map_get                                    -      64     +64
vfs_rename                                  1544    1604     +60
test_map_alloc                                 -      60     +60
proc_map_release                               -      60     +60
percpu_ref_init                               50     110     +60
__sock_tx_timestamp                            -      60     +60
udp_sendmsg                                 1690    1748     +58
packet_sendmsg_spkt                          604     662     +58
nfs_commit_release_pages                     260     318     +58
gen_new_estimator                            450     508     +58
__start_tty                                    -      58     +58
delayed_mntput                                 -      56     +56
bpf_alu_string                                 -      56     +56
blk_rq_timed_out_timer                       170     226     +56
target_attribute_is_visible                  800     854     +54
t10_pi_type3_ip                                -      54     +54
t10_pi_type3_crc                               -      54     +54
t10_pi_type1_ip                                -      54     +54
t10_pi_type1_crc                               -      54     +54
memzero_page                                   -      54     +54
irq_may_run                                    -      54     +54
devm_release_resource                          -      54     +54
bio_alloc_bioset                             370     424     +54
test_is_valid_access                           -      52     +52
pcpu_chunk_populated                           -      52     +52
init_net                                    1924    1976     +52
delayed_mntput_work                            -      52     +52
bpf_prog_free                                 14      66     +52
bio_integrity_prep                           466     518     +52
__dl_clear_params                              -      52     +52
tcp_create_openreq_child                     874     924     +50
show_state_filter                            116     166     +50
mark_reg_unknown_value                         -      50     +50
get_request                                 1104    1154     +50
vfs_setlease                                  20      68     +48
tcp_xmit_probe_skb                           128     176     +48
t10_pi_ip_fn                                   -      48     +48
slab_unmergeable                               -      48     +48
param_check_unsafe                             -      48     +48
netlink_release                              988    1036     +48
netif_wake_subqueue                            -      48     +48
est_timer                                    570     618     +48
cputime_advance                                -      48     +48
bio_integrity_free                           108     156     +48
tcp_make_synack                              692     738     +46
lzo1x_decompress_safe                        892     938     +46
vfs_unlink                                   254     298     +44
request_key_and_link                        1040    1084     +44
percpu_ref_kill_and_confirm                   88     132     +44
pcpu_free_area                               348     392     +44
fcntl_getlease                                80     124     +44
ethtool_tunable_valid                          -      44     +44
sys_umount                                   830     872     +42
sock_kfree_s                                  34      76     +42
fib_validate_source                          602     644     +42
detach_and_collect                             -      42     +42
dentry_free                                   40      82     +42
blk_peek_request                             392     434     +42
__netdev_alloc_frag                          284     326     +42
SyS_umount                                   830     872     +42
xs_tcp_setup_socket                          842     882     +40
vfs_rmdir                                    234     274     +40
pcpu_schedule_balance_work                     -      40     +40
key_default_cmp                                -      40     +40
ip_send_unicast_reply                        600     640     +40
flush_end_io                                 344     384     +40
flush_data_end_io                             46      86     +40
clear_refs_write                             514     554     +40
bpf_prog_put                                   -      40     +40
bpf_jmp_string                                 -      40     +40
tcp_connect                                 1882    1920     +38
irq_work_tick                                  -      38     +38
blk_mq_tag_to_rq                              68     106     +38
bio_integrity_add_page                        80     118     +38
__stop_tty                                     -      38     +38
reg_type_str                                   -      36     +36
ping_v4_sendmsg                             1130    1166     +36
mq_flush_data_end_io                          84     120     +36
kmem_cache_create                            344     380     +36
ipv6_find_hdr                                678     714     +36
integrity_verify_show                          -      36     +36
gnet_stats_copy_basic                        126     162     +36
dma_pool_destroy                             344     380     +36
blk_free_flush_queue                           -      36     +36
xfrm_policy_hash_rebuild                       -      34     +34
sock_efree                                     -      34     +34
nsm_mon_unmon                                158     192     +34
netif_tx_wake_queue                            -      34     +34
m_next_vma                                     -      34     +34
ipv4_table                                  2210    2244     +34
ipv4_net_table                               510     544     +34
integrity_generate_show                        -      34     +34
integrity_device_show                          -      34     +34
__kstrtab_wait_on_page_bit_killable_timeout       -      34     +34
xs_udp_send_request                          118     150     +32
sys_flock                                    318     350     +32
skb_complete_wifi_ack                        114     146     +32
rt_mutex_setprio                             562     594     +32
put_mountpoint                                70     102     +32
percpu_ref_switch_to_percpu                    -      32     +32
ops                                           56      88     +32
nfs_inode_remove_request                     146     178     +32
ll_front_merge_fn                            384     416     +32
device_del                                   330     362     +32
cdrom_newpc_intr                            2266    2298     +32
bpf_class_string                               -      32     +32
blk_stack_limits                             970    1002     +32
bio_integrity_endio                          116     148     +32
bh_lrus                                       32      64     +32
__ww_mutex_lock_interruptible_slowpath       308     340     +32
__kstrtab_out_of_line_wait_on_bit_timeout       -      32     +32
SyS_flock                                    318     350     +32
test_funcs                                     -      30     +30
svc_recvfrom                                  84     114     +30
register_test_ops                              -      30     +30
ll_back_merge_fn                             380     410     +30
init_arraycache                                -      30     +30
bpf_register_prog_type                         -      30     +30
bpf_register_map_type                          -      30     +30
bio_integrity_trim                            78     108     +30
__ww_mutex_lock_slowpath                     274     304     +30
__bpf_prog_free                                -      30     +30
xs_tcp_send_request                          198     226     +28
tcp_wfree                                    134     162     +28
sd_dif_config_host                           492     520     +28
scsi_sysfs_device_initialize                 296     324     +28
rpc_create_xprt                              168     196     +28
qdisc_rcu_free                                24      52     +28
nfs_setattr                                  346     374     +28
nfs_generic_pg_test                           80     108     +28
ip_setsockopt                               3508    3536     +28
grace_net_ops                                  -      28     +28
flag_mask                                      -      28     +28
do_kernel_restart                              -      28     +28
cputime_adjust                               330     358     +28
blkdev_readpages                               -      28     +28
__kstrtab___skb_gro_checksum_complete          -      28     +28
__kstrtab_unregister_restart_handler           -      27     +27
try_to_wake_up                               146     172     +26
tcp_init                                     552     578     +26
tcp_ecn_queue_cwr                              -      26     +26
task_rq_lock                                  28      54     +26
sys_semtimedop                              1760    1786     +26
scsi_timeout                                   -      26     +26
release_pages                                544     570     +26
pointer                                     1092    1118     +26
netif_schedule_queue                           -      26     +26
init_nlm                                      66      92     +26
detach_mnt                                   100     126     +26
blk_abort_request                             38      64     +26
__kstrtab_skb_complete_tx_timestamp            -      26     +26
__alloc_percpu_gfp                             -      26     +26
SyS_semtimedop                              1760    1786     +26
__kstrtab_xfrm_policy_hash_rebuild             -      25     +25
__kstrtab_register_restart_handler             -      25     +25
test_map_ops                                   -      24     +24
tcp_conn_request                            1440    1464     +24
t10_pi_crc_fn                                  -      24     +24
super_cache_scan                             392     416     +24
strncasecmp                                  106     130     +24
sg_scsi_ioctl                                554     578     +24
sg_io                                        886     910     +24
percpu_ref_switch_to_atomic                    -      24     +24
netif_skb_features                           364     388     +24
mnt_set_mountpoint                            62      86     +24
caller_saved                                   -      24     +24
bpf_map_free_deferred                          -      24     +24
__kstrtab___gnet_stats_copy_basic              -      24     +24
__kstrtab___kmalloc_track_caller               -      23     +23
xs_local_send_request                        128     150     +22
tcp_send_delayed_ack                         190     212     +22
t10_pi_type3_verify_ip                         -      22     +22
t10_pi_type3_verify_crc                        -      22     +22
t10_pi_type3_generate_ip                       -      22     +22
t10_pi_type3_generate_crc                      -      22     +22
t10_pi_type1_verify_ip                         -      22     +22
t10_pi_type1_verify_crc                        -      22     +22
t10_pi_type1_generate_ip                       -      22     +22
t10_pi_type1_generate_crc                      -      22     +22
single_task_running                            -      22     +22
search_binary_handler                        312     334     +22
proc_reg_get_unmapped_area                   144     166     +22
percpu_ref_reinit                            100     122     +22
mpage_bio_submit                              32      54     +22
locks_release_private                         40      62     +22
dump_page_badflags                           114     136     +22
cache_type_store                             416     438     +22
blk_register_queue                           210     232     +22
blk_mq_finish_init                             -      22     +22
bioset_create_nobvec                           -      22     +22
bio_split                                    158     180     +22
bio_advance                                  230     252     +22
_request_firmware                           1400    1422     +22
__kstrtab_devm_request_resource                -      22     +22
__kstrtab_devm_release_resource                -      22     +22
__bio_free                                    24      46     +22
__kstrtab_sock_dequeue_err_skb                 -      21     +21
__kstrtab_nfs_put_lock_context                 -      21     +21
__kstrtab_nfs_get_lock_context                 -      21     +21
__kstrtab_netif_schedule_queue                 -      21     +21
__kstrtab_blk_mq_start_request                 -      21     +21
__kstrtab_bioset_create_nobvec                 -      21     +21
__kstrtab_alloc_skb_with_frags                 -      21     +21
__kstrtab___skb_flow_get_ports                 -      21     +21
__kstrtab___blk_mq_end_request                 -      21     +21
xfrm_policy_flush                            292     312     +20
unregister_restart_handler                     -      20     +20
tcp_transmit_skb                            2234    2254     +20
sys_pivot_root                               602     622     +20
stable_page_flags                           1018    1038     +20
register_restart_handler                       -      20     +20
free_module                                  382     402     +20
do_acct_process                             1156    1176     +20
dns_resolver_match_preparse                    -      20     +20
bpf_prog_release                               -      20     +20
bpf_map_release                                -      20     +20
bio_integrity_enabled                        112     132     +20
bio_integrity_alloc                          236     256     +20
bio_clone_fast                                94     114     +20
bio_clone_bioset                             718     738     +20
__kstrtab_single_task_running                  -      20     +20
__kstrtab_netif_wake_subqueue                  -      20     +20
__kstrtab_netif_tx_wake_queue                  -      20     +20
__kstrtab_locks_copy_conflock                  -      20     +20
__kstrtab_bit_wait_io_timeout                  -      20     +20
__kstrtab___sock_tx_timestamp                  -      20     +20
SyS_pivot_root                               602     622     +20
__kstrtab_blk_mq_end_request                   -      19     +19
__kstrtab___skb_flow_dissect                   -      19     +19
__kstrtab___dev_get_by_flags                   -      19     +19
__kstrtab___alloc_percpu_gfp                   -      19     +19
test_func_proto                                -      18     +18
set_task_stack_end_magic                       -      18     +18
scsi_queue_rq                               1294    1312     +18
rhashtable_init                              246     264     +18
lease_get_mtime                               70      88     +18
fsnotify_put_group                            28      46     +18
fcntl_getlk64                                318     336     +18
devm_resource_match                            -      18     +18
call_transmit_status                         182     200     +18
bpf_prog_free_deferred                         -      18     +18
blk_mq_free_bitmap                             -      18     +18
bio_integrity_clone                          138     156     +18
__kstrtab_string_escape_mem                    -      18     +18
__kstrtab_simple_nosetlease                    -      18     +18
__kstrtab_icmp_global_allow                    -      18     +18
__kstrtab_t10_pi_type3_crc                     -      17     +17
__kstrtab_t10_pi_type1_crc                     -      17     +17
__kstrtab_bpf_prog_realloc                     -      17     +17
__kstrtab_bit_wait_timeout                     -      17     +17
wake_up_new_task                              84     100     +16
tl_prog                                        -      16     +16
tl_map                                         -      16     +16
tcp_tx_timestamp                              66      82     +16
sd_read_block_characteristics                100     116     +16
pcpu_balance_work                              -      16     +16
nfs_volume_list_ops                            -      16     +16
ipv6_skip_exthdr                             276     292     +16
init_grace                                     -      16     +16
elv_requeue_request                          144     160     +16
elv_completed_request                        120     136     +16
devm_resource_release                          -      16     +16
bpf_ldst_string                                -      16     +16
arp_rcv                                      312     328     +16
__kstrtab_t10_pi_type3_ip                      -      16     +16
__kstrtab_t10_pi_type1_ip                      -      16     +16
__kstrtab_kmem_cache_size                      -      16     +16
__kstrtab_eth_get_headlen                      -      16     +16
__kstrtab___bpf_prog_free                      -      16     +16
__cleanup_mnt                                  -      16     +16
__kstrtab_page_waitqueue                       -      15     +15
__kstrtab_inet6_offloads                       -      15     +15
__kstrtab_copy_from_iter                       -      15     +15
__kstrtab_bpf_prog_alloc                       -      15     +15
__kstrtab___kernel_write                       -      15     +15
wb_priority                                   36      50     +14
udp_gro_complete                              84      98     +14
test_map_free                                  -      14     +14
sys_io_setup                                1894    1908     +14
percpu_count_ptr                               -      14     +14
m_start                                      456     470     +14
keyring_search                               126     140     +14
integrity_verify_entry                         -      14     +14
integrity_generate_entry                       -      14     +14
integrity_device_entry                         -      14     +14
fcntl_getlk                                  370     384     +14
blk_recount_segments                          78      92     +14
bio_integrity_verify_fn                       48      62     +14
__rpc_clone_client                           152     166     +14
__kstrtab_iov_iter_zero                        -      14     +14
__kstrtab_inet_offloads                        -      14     +14
SyS_io_setup                                1894    1908     +14
__setup_str_setup_slab_nomerge                 -      13     +13
__kstrtab_tcp_send_ack                         -      13     +13
__kstrtab_skb_clone_sk                         -      13     +13
__kstrtab_copy_to_iter                         -      13     +13
xfrm_policy_insert                           776     788     +12
xfrm_policy_fini                             274     286     +12
time_out_leases                              132     144     +12
tcp_prequeue                                 550     562     +12
start_kernel                                1070    1082     +12
sd_revalidate_disk                          4330    4342     +12
scsi_mq_uninit_cmd                            86      98     +12
scsi_error_handler                          1066    1078     +12
pools_reg_lock                                 -      12     +12
percpu_enable_async                            -      12     +12
nlmsvc_lock_operations                        24      36     +12
lease_manager_ops                             24      36     +12
do_read_fault                                506     518     +12
bpf_verifier_lock                              -      12     +12
__xfrm_policy_link                           214     226     +12
__setup_setup_slab_nomerge                     -      12     +12
__break_lease                                646     658     +12
__kstrtab_sock_efree                           -      11     +11
xs_error_report                               68      78     +10
switched_from_dl                              38      48     +10
setup_slab_nomerge                             -      10     +10
scsi_init_io                                 442     452     +10
kmem_cache_size                                -      10     +10
key_get_type_from_user                        56      66     +10
ipv6_gso_pull_exthdrs                        220     230     +10
ide_disk_setup                               984     994     +10
alloc_vfsmnt                                 260     270     +10
__skb_get_hash                               340     350     +10
__sched_setscheduler                        1496    1506     +10
__kmalloc_reserve                             90     100     +10
__get_user_pages                             698     708     +10
__blk_send_generic                           106     116     +10
wait_consider_task                          1860    1868      +8
u64_to_ptr                                     -       8      +8
test_ops                                       -       8      +8
tcp_v4_send_reset                            438     446      +8
tcp_clear_retrans                             24      32      +8
smc_ethtool_ops                              188     196      +8
set_user_nice                                298     306      +8
percpu_ref_switch_waitq                        -       8      +8
parse_args                                   714     722      +8
packet_direct_xmit                           342     350      +8
netlink_setsockopt                           422     430      +8
netlink_set_err                              172     180      +8
netlink_sendmsg                              792     800      +8
netlink_proto_init                           448     456      +8
netlink_create                               478     486      +8
netlink_connect                              222     230      +8
netlink_bind                                 406     414      +8
m_stop                                        96     104      +8
loopback_ethtool_ops                         188     196      +8
lockd_up                                     684     692      +8
ip_cmsg_recv                                 550     558      +8
icmp_global                                    -       8      +8
ext4_listxattr                               526     534      +8
do_scan_async                                348     356      +8
default_ethtool_ops                          188     196      +8
call_status                                  500     508      +8
bsg_map_hdr                                  604     612      +8
bpf_prog_types                                 -       8      +8
bpf_map_types                                  -       8      +8
blk_get_request                              248     256      +8
__netlink_kernel_create                      476     484      +8
__netlink_clear_multicast_users               88      96      +8
__ksymtab_xfrm_policy_hash_rebuild             -       8      +8
__ksymtab_wait_on_page_bit_killable_timeout       -       8      +8
__ksymtab_unregister_restart_handler           -       8      +8
__ksymtab_tcp_send_ack                         -       8      +8
__ksymtab_t10_pi_type3_ip                      -       8      +8
__ksymtab_t10_pi_type3_crc                     -       8      +8
__ksymtab_t10_pi_type1_ip                      -       8      +8
__ksymtab_t10_pi_type1_crc                     -       8      +8
__ksymtab_string_escape_mem                    -       8      +8
__ksymtab_sock_efree                           -       8      +8
__ksymtab_sock_dequeue_err_skb                 -       8      +8
__ksymtab_skb_complete_tx_timestamp            -       8      +8
__ksymtab_skb_clone_sk                         -       8      +8
__ksymtab_single_task_running                  -       8      +8
__ksymtab_simple_nosetlease                    -       8      +8
__ksymtab_register_restart_handler             -       8      +8
__ksymtab_page_waitqueue                       -       8      +8
__ksymtab_out_of_line_wait_on_bit_timeout       -       8      +8
__ksymtab_nfs_put_lock_context                 -       8      +8
__ksymtab_nfs_get_lock_context                 -       8      +8
__ksymtab_netif_wake_subqueue                  -       8      +8
__ksymtab_netif_tx_wake_queue                  -       8      +8
__ksymtab_netif_schedule_queue                 -       8      +8
__ksymtab_locks_copy_conflock                  -       8      +8
__ksymtab_kmem_cache_size                      -       8      +8
__ksymtab_iov_iter_zero                        -       8      +8
__ksymtab_inet_offloads                        -       8      +8
__ksymtab_inet6_offloads                       -       8      +8
__ksymtab_icmp_global_allow                    -       8      +8
__ksymtab_eth_get_headlen                      -       8      +8
__ksymtab_devm_request_resource                -       8      +8
__ksymtab_devm_release_resource                -       8      +8
__ksymtab_copy_to_iter                         -       8      +8
__ksymtab_copy_from_iter                       -       8      +8
__ksymtab_bpf_prog_realloc                     -       8      +8
__ksymtab_bpf_prog_alloc                       -       8      +8
__ksymtab_blk_mq_start_request                 -       8      +8
__ksymtab_blk_mq_end_request                   -       8      +8
__ksymtab_bit_wait_timeout                     -       8      +8
__ksymtab_bit_wait_io_timeout                  -       8      +8
__ksymtab_bioset_create_nobvec                 -       8      +8
__ksymtab_bin2hex                              -       8      +8
__ksymtab_alloc_skb_with_frags                 -       8      +8
__ksymtab___sock_tx_timestamp                  -       8      +8
__ksymtab___skb_gro_checksum_complete          -       8      +8
__ksymtab___skb_flow_get_ports                 -       8      +8
__ksymtab___skb_flow_dissect                   -       8      +8
__ksymtab___kmalloc_track_caller               -       8      +8
__ksymtab___kernel_write                       -       8      +8
__ksymtab___gnet_stats_copy_basic              -       8      +8
__ksymtab___dev_get_by_flags                   -       8      +8
__ksymtab___bpf_prog_free                      -       8      +8
__ksymtab___blk_mq_end_request                 -       8      +8
__ksymtab___alloc_percpu_gfp                   -       8      +8
__kstrtab_bin2hex                              -       8      +8
test_func                                      -       6      +6
sys_brk                                      298     304      +6
param_attr_store                              92      98      +6
netlink_update_subscriptions                 100     106      +6
netlink_update_listeners                     160     166      +6
netlink_seq_next                             156     162      +6
netlink_realloc_groups                       156     162      +6
netlink_lookup                                46      52      +6
netlink_insert                               132     138      +6
netlink_has_listeners                        104     110      +6
netlink_broadcast_filtered                   694     700      +6
netlink_autobind                             186     192      +6
mq_dump_class_stats                           80      86      +6
memdup_user                                   74      80      +6
kstrndup                                      88      94      +6
kstrdup                                       72      78      +6
krealloc                                     122     128      +6
kmemdup                                       54      60      +6
kmem_cache_flags                               -       6      +6
eth_type_trans                               310     316      +6
devres_alloc                                  62      68      +6
devm_kmalloc                                  78      84      +6
crypto_hash_walk_first_compat                 62      68      +6
crypto_hash_walk_first                        66      72      +6
crypto_ahash_walk_first                       72      78      +6
bpf_prepare_filter                           258     264      +6
blk_update_request                           848     854      +6
blk_mq_alloc_request                         194     200      +6
__skb_get_poff                               204     210      +6
__netlink_change_ngroups                     186     192      +6
__krealloc                                    98     104      +6
__kfree_skb                                  120     126      +6
SyS_brk                                      298     304      +6
__func__                                    7306    7311      +5
xs_udp_data_ready                            372     376      +4
xs_local_data_ready                          306     310      +4
vermagic                                      49      53      +4
umount_tree                                  426     430      +4
tcp_v4_send_ack                              292     296      +4
tcp_reno                                      68      72      +4
sysctl_icmp_msgs_per_sec                       -       4      +4
sysctl_icmp_msgs_burst                         -       4      +4
switched_to_dl                                54      58      +4
smc_netdev_ops                               208     212      +4
slab_nomerge                                   -       4      +4
simple_nosetlease                              -       4      +4
scsi_prep_state_check                        188     192      +4
rhashtable_shrink                            122     126      +4
restart_handler_list                           -       4      +4
raw_local_deliver                            400     404      +4
pcpu_populate_chunk                            -       4      +4
pcpu_nr_empty_pop_pages                        -       4      +4
page_writeback_init                           22      26      +4
nfs_swap_activate                             58      62      +4
nfs_pgio_data_destroy                         44      48      +4
nfeth_netdev_ops                             208     212      +4
mutex_optimistic_spin                          -       4      +4
loopback_ops                                 208     212      +4
lookup_user_key_possessed                     16      20      +4
log_size                                       -       4      +4
log_level                                      -       4      +4
log_len                                        -       4      +4
log_buf                                        4       8      +4
locks_remove_file                            366     370      +4
lance_netdev_ops                             208     212      +4
kvasprintf                                    90      94      +4
ip_queue_xmit                                884     888      +4
ip_build_and_send_pkt                        402     406      +4
integrity_attrs                               20      24      +4
insn_state                                     -       4      +4
insn_stack                                     -       4      +4
inotify_free_group_priv                       64      68      +4
init_signals                                 512     516      +4
grace_net_id                                   -       4      +4
gen_replace_estimator                         50      54      +4
fnhe_hashrnd                                   -       4      +4
eip_netdev_ops                               208     212      +4
do_maps_open                                  92      96      +4
do_fcntl                                     968     972      +4
delayed_mntput_list                            -       4      +4
d_materialise_unique                         562     566      +4
cur_stack                                      -       4      +4
cubictcp                                      68      72      +4
byte_count                                     -       4      +4
bdi_init                                     254     258      +4
alloc_netdev_mqs                             764     768      +4
add_page_wait_queue                           44      48      +4
__initcall_register_test_ops7                  -       4      +4
__initcall_percpu_enable_async4                -       4      +4
__initcall_init_grace6                         -       4      +4
__alloc_reserved_percpu                       24      28      +4
__alloc_percpu                                22      26      +4
___once_key                                   40      44      +4
zlib_inflate                                4110    4112      +2
xfrm4_extract_output                         154     156      +2
wait_on_page_bit_killable                    106     108      +2
wait_on_page_bit                             102     104      +2
unlock_page                                   34      36      +2
try_to_free_pages                           1180    1182      +2
tcp_out_of_resources                         182     184      +2
tcp_init_sock                                298     300      +2
task_sched_runtime                            56      58      +2
switched_to_rt                                48      50      +2
switched_to_fair                              52      54      +2
switched_from_fair                            72      74      +2
svc_tcp_recvfrom                            1272    1274      +2
string                                       206     208      +2
skb_zerocopy                                 702     704      +2
rtnl_newlink                                1350    1352      +2
release_one_tty                              112     114      +2
process_vm_rw                               1156    1158      +2
prio_changed_rt                               60      62      +2
prio_changed_fair                             62      64      +2
prio_changed_dl                               44      46      +2
pick_next_task_stop                           72      74      +2
percpu_ref_noop_confirm_switch                 -       2      +2
pcpu_chunk_relocate                          114     116      +2
netpoll_start_xmit                           364     366      +2
n_tty_receive_char_special                  2158    2160      +2
n_tty_receive_char_flagged                   538     540      +2
n_tty_receive_buf_common                    2130    2132      +2
loopback_setup                               118     120      +2
lease_modify                                 154     156      +2
lease_break_callback                          30      32      +2
kmem_cache_free                              198     200      +2
ip_copy_metadata                             192     194      +2
inet_sk_rx_dst_set                            36      38      +2
ide_park_store                               578     580      +2
icmp_push_reply                              226     228      +2
fib_create_info                             2450    2452      +2
end_page_writeback                           100     102      +2
elv_merge_requests                            92      94      +2
do_task_delta_exec                            74      76      +2
dl_task_timer                                132     134      +2
dev_init_scheduler_queue                      18      20      +2
clear_bdi_congested                           88      90      +2
cfb_copyarea                                2012    2014      +2
bt_clear_tag                                 278     280      +2
blk_release_queue                            196     198      +2
blk_queue_bio                                626     628      +2
blk_mq_freeze_queue                          270     272      +2
alloc_arraycache                              54      56      +2
__warned                                     318     320      +2
__schedule                                   890     892      +2
__lock_page_killable                          84      86      +2
__lock_page                                   84      86      +2
__ip_make_skb                                954     956      +2
__alloc_pages_nodemask                      1512    1514      +2
pcpu_atomic_alloc_failed                       -       1      +1
pcpu_async_enabled                             -       1      +1
___done                                       10      11      +1
xfrm4_udp_encap_rcv                          380     378      -2
tty_write_message                            118     116      -2
tcp_try_fastopen                             584     582      -2
tcp_snd_wnd_test                              58      56      -2
tcp_setsockopt                              1980    1978      -2
tcp_set_skb_tso_segs                         142     140      -2
tcp_fastopen_create_child                    610     608      -2
task_tick_rt                                 246     244      -2
skb_prepare_for_shift                         72      70      -2
show_pools                                   240     238      -2
scsi_eh_ready_devs                          1536    1534      -2
schedule_hrtimeout_range_clock               230     228      -2
release_task_mempolicy                         2       -      -2
pty_unix98_ioctl                             292     290      -2
pty_flush_buffer                              64      62      -2
pskb_expand_head                             506     504      -2
pfkey_promisc                                188     186      -2
path_mountpoint                              868     866      -2
netlink_skb_destructor                       120     118      -2
n_tty_poll                                   338     336      -2
n_tty_packet_mode_flush                       66      64      -2
mem_open                                      34      32      -2
kmem_cache_alloc                             180     178      -2
iptunnel_handle_offloads                     218     216      -2
ip_rcv_finish                                838     836      -2
ip_ra_control                                326     324      -2
find_next_zero_bit_le                        232     230      -2
environ_open                                  22      20      -2
do_drain                                      76      74      -2
copy_process                                3796    3794      -2
cont_write_begin                             856     854      -2
cache_flusharray                             212     210      -2
blk_mq_init_flush                              2       -      -2
blk_init_allocated_queue                     202     200      -2
__skb_splice_bits                            274     272      -2
__skb_gso_segment                            190     188      -2
__pskb_pull_tail                             684     682      -2
__print_once                                  29      27      -2
__kmalloc                                    204     202      -2
__f_setown                                    28      26      -2
__disable_irq_nosync                          90      88      -2
___pskb_trim                                 490     488      -2
zero_bdi                                     248     244      -4
v6_seq                                         4       -      -4
v4_seq                                         4       -      -4
udpv6_offload                                 20      16      -4
udpv4_offload                                 20      16      -4
tty_set_termios                              710     706      -4
tcpv6_offload                                 20      16      -4
tcpv4_offload                                 20      16      -4
tcp_send_dupack                              116     112      -4
sysctl_ip_nonlocal_bind                        4       -      -4
swap_backing_dev_info                        248     244      -4
skb_vlan_untag                               480     476      -4
skb_make_writable                            164     160      -4
skb_cow_data                                 624     620      -4
skb_checksum_help                            390     386      -4
sk_filter_uncharge                            68      64      -4
sit_offload                                   20      16      -4
shmem_backing_dev_info                       248     244      -4
sched_rt_handler                             638     634      -4
scan_unevictable_pages                         4       -      -4
rthdr_offload                                 20      16      -4
ramfs_backing_dev_info                       248     244      -4
noop_backing_dev_info                        248     244      -4
nfs_setlease                                   4       -      -4
nfs_file_write                               304     300      -4
nfs_file_read                                138     134      -4
mmc_ioctl_cdrom_read_audio                   774     770      -4
locks_start_grace                            108     104      -4
locks_in_grace                                94      90      -4
kmem_cache_boot                              122     118      -4
kfree                                        200     196      -4
key_create_or_update                         800     796      -4
kernfs_bdi                                   248     244      -4
ipv6_packet_offload                           26      22      -4
ipip_offload                                  20      16      -4
ip_packet_offload                             26      22      -4
gre_offload                                   20      16      -4
get_slabinfo                                 534     530      -4
fsnotify_add_vfsmount_mark                   312     308      -4
free_block                                   356     352      -4
enable_irq                                   128     124      -4
dstopt_offload                                20      16      -4
directly_mappable_cdev_bdi                   248     244      -4
default_backing_dev_info                     248     244      -4
count                                        128     124      -4
cache_reap                                   280     276      -4
blk_integrity_register                       264     260      -4
blk_get_backing_dev_info                      24      20      -4
__setup_irq                                  950     946      -4
__do_softirq                                 452     448      -4
__ac_get_obj                                 402     398      -4
tty_write                                    424     418      -6
tcp_validate_incoming                        638     632      -6
tcp_v4_do_rcv                                636     630      -6
tcp_timewait_state_process                   776     770      -6
tcp_skb_mark_lost_uncond_verify               60      54      -6
tcp_simple_retransmit                        320     314      -6
tcp_adjust_pcount                            244     238      -6
scsi_execute                                 436     430      -6
scsi_dispatch_cmd                            192     186      -6
run_ksoftirqd                                102      96      -6
nlmsvc_free_block                            124     118      -6
lookup_dcache                                142     136      -6
inet_proto_csum_replace4                     196     190      -6
inet_peer_base_init                           26      20      -6
bpf_convert_filter                          3478    3472      -6
blk_mq_requeue_request                        66      60      -6
blk_mq_make_request                          456     450      -6
__prandom_timer                              122     116      -6
__dentry_kill                                378     372      -6
__blk_mq_run_hw_queue                        730     724      -6
tcp_xmit_retransmit_queue                    582     574      -8
tcp_recv_skb                                 126     118      -8
tcp_rcv_established                         1326    1318      -8
tcp_init_tso_segs                             72      64      -8
tcp_event_new_data_sent                      120     112      -8
tcp_check_req                               1074    1066      -8
smc_drv_probe                               2230    2222      -8
skb_pad                                      256     248      -8
send_break                                   188     180      -8
proc_ipcauto_dointvec_minmax                 188     180      -8
normalize_rt_tasks                           444     436      -8
nlmsvc_grant_deferred                        320     312      -8
kernfs_dop_revalidate                        178     170      -8
ipv6_gro_receive                             716     708      -8
handle_edge_irq                              222     214      -8
gre_gso_segment                              700     692      -8
free_fib_info_rcu                            294     286      -8
__smp_mb__before_atomic                        8       -      -8
__smp_mb__after_atomic                         8       -      -8
__skb_tx_hash                                172     164      -8
__ksymtab_user_match                           8       -      -8
__ksymtab_tcp_init_congestion_ops              8       -      -8
__ksymtab_sysctl_ip_nonlocal_bind              8       -      -8
__ksymtab_sock_tx_timestamp                    8       -      -8
__ksymtab_skb_gro_receive                      8       -      -8
__ksymtab_skb_flow_get_ports                   8       -      -8
__ksymtab_skb_flow_dissect                     8       -      -8
__ksymtab_nfs_setlease                         8       -      -8
__ksymtab_lock_may_write                       8       -      -8
__ksymtab_lock_may_read                        8       -      -8
__ksymtab_dev_hard_start_xmit                  8       -      -8
__ksymtab_dev_get_by_flags_rcu                 8       -      -8
__ksymtab_check_submounts_and_drop             8       -      -8
__ksymtab_blk_mq_end_io                        8       -      -8
__ksymtab_bio_integrity_tag_size               8       -      -8
__ksymtab_bio_integrity_set_tag                8       -      -8
__ksymtab_bio_integrity_get_tag                8       -      -8
__ksymtab_balloon_page_enqueue                 8       -      -8
__ksymtab_balloon_page_dequeue                 8       -      -8
__ksymtab_balloon_devinfo_alloc                8       -      -8
__ksymtab___smp_mb__before_atomic              8       -      -8
__ksymtab___smp_mb__after_atomic               8       -      -8
__ksymtab___locks_copy_lock                    8       -      -8
__ksymtab___blk_mq_end_io                      8       -      -8
__kmem_cache_create                          606     598      -8
try_to_unuse                                1134    1124     -10
tcp_try_rmem_schedule                        826     816     -10
tcp_match_skb_to_sack                        232     222     -10
tcp_established_options                      150     140     -10
tcp_close                                   1156    1146     -10
svc_prepare_thread                           442     432     -10
setup_per_cpu_areas                          160     150     -10
mq_dump                                      208     198     -10
handle_simple_irq                             82      72     -10
blk_insert_flush                             546     536     -10
bdi_destroy                                  228     218     -10
__kstrtab_user_match                          11       -     -11
xs_udp_setup_socket                          210     198     -12
xs_local_setup_socket                        360     348     -12
tcp_trim_head                                190     178     -12
tcp_send_loss_probe                          500     488     -12
tcp_sacktag_write_queue                     1926    1914     -12
tcp_read_sock                                364     352     -12
tcp_init_cwnd_reduction                       74      62     -12
skb_release_data                             190     178     -12
rpc_async_schedule                            28      16     -12
rcu_irq_enter                                100      88     -12
rcu_check_callbacks                           84      72     -12
hash_resize_mutex                             24      12     -12
handle_level_irq                             172     160     -12
dev_ethtool                                 3442    3430     -12
check_and_drop                                44      32     -12
blkdev_ioctl                                2964    2952     -12
__kstrtab_nfs_setlease                        13       -     -13
vma_stop                                      48      34     -14
tcp_try_coalesce                             154     140     -14
set_up_node                                  354     340     -14
sd_dif_type3_generate_ip                      14       -     -14
sd_dif_type3_generate_crc                     14       -     -14
sd_dif_type1_generate_ip                      14       -     -14
sd_dif_type1_generate_crc                     14       -     -14
pid_revalidate                               164     150     -14
pcpu_count_ptr                                14       -     -14
nfs_lock_and_join_requests                   948     934     -14
integrity_write_entry                         14       -     -14
integrity_read_entry                          14       -     -14
free_percpu                                  212     198     -14
do_shmat                                     852     838     -14
bdi_unregister                               322     308     -14
__kstrtab_lock_may_read                       14       -     -14
__kstrtab_blk_mq_end_io                       14       -     -14
__kmem_cache_shrink                          202     188     -14
__kstrtab_lock_may_write                      15       -     -15
tcp_may_send_now                             244     228     -16
skb_clone                                    168     152     -16
setup_node_pointer                            16       -     -16
sd_dif_type3_verify_ip                        16       -     -16
sd_dif_type3_verify_crc                       16       -     -16
sd_dif_type1_verify_ip                        16       -     -16
sd_dif_type1_verify_crc                       16       -     -16
pcpu_reclaim_work                             16       -     -16
nobh_truncate_page                           676     660     -16
lookup_fast                                  664     648     -16
fanotify_read                                788     772     -16
bpf_prog_create                              142     126     -16
blk_mq_start_request                         116     100     -16
bio_integrity_init                           102      86     -16
bdi_lock_two                                  16       -     -16
__kstrtab_skb_gro_receive                     16       -     -16
__kstrtab___blk_mq_end_io                     16       -     -16
__kmem_cache_shutdown                        108      92     -16
__kstrtab_skb_flow_dissect                    17       -     -17
xfrm_output                                  168     150     -18
tcp_retransmit_skb                           194     176     -18
tcp_rcv_state_process                       2786    2768     -18
svc_wake_up                                  154     136     -18
sk_attach_filter                             374     356     -18
lance_tx_timeout                             378     360     -18
lance_interrupt                             1266    1248     -18
__kstrtab_sock_tx_timestamp                   18       -     -18
__kstrtab___locks_copy_lock                   18       -     -18
__kstrtab_skb_flow_get_ports                  19       -     -19
tcp_slow_start                                60      40     -20
tcp_send_active_reset                        234     214     -20
smc_timeout                                  246     226     -20
proc_flush_task                              332     312     -20
nfeth_tx_timeout                              42      22     -20
key_get_instantiation_authkey                164     144     -20
initarray_generic                             20       -     -20
dev_load                                     144     124     -20
copy_mount_string                             46      26     -20
__kstrtab_dev_hard_start_xmit                 20       -     -20
__kstrtab_dev_get_by_flags_rcu                21       -     -21
__kstrtab_balloon_page_enqueue                21       -     -21
__kstrtab_balloon_page_dequeue                21       -     -21
rcu_idle_exit                                160     138     -22
nfs_lookup_revalidate                        858     836     -22
locks_copy_lock                              130     108     -22
ic_bootp_recv                               1406    1384     -22
gre_gso_send_check                            22       -     -22
do_ccupdate_local                             22       -     -22
blk_rq_timed_out                             112      90     -22
blk_mq_rq_timer                              226     204     -22
attempt_merge                               1650    1628     -22
__kstrtab_bio_integrity_set_tag               22       -     -22
__kstrtab_bio_integrity_get_tag               22       -     -22
__kstrtab_balloon_devinfo_alloc               22       -     -22
__kstrtab_bio_integrity_tag_size              23       -     -23
__kstrtab___smp_mb__after_atomic              23       -     -23
tcp_mark_head_lost                           410     386     -24
tcp_clear_retrans_partial                     24       -     -24
skb_gro_receive                             1060    1036     -24
sd_dif_crc_fn                                 24       -     -24
lookup_user_key                             1106    1082     -24
eip_tx_timeout                               230     206     -24
cache_alloc_refill                          1302    1278     -24
__kstrtab_tcp_init_congestion_ops             24       -     -24
__kstrtab_sysctl_ip_nonlocal_bind             24       -     -24
__kstrtab___smp_mb__before_atomic             24       -     -24
__d_move                                    1090    1066     -24
__blkdev_get                                1072    1048     -24
__kstrtab_check_submounts_and_drop            25       -     -25
udp4_lib_lookup2                             656     630     -26
tid_fd_revalidate                            318     292     -26
show_map                                      70      44     -26
prandom_bytes_state                          110      84     -26
cookie_v4_check                             1138    1112     -26
__key_link_check_live_key                    140     114     -26
__inet_lookup_listener                       588     562     -26
udp_gro_receive                              358     330     -28
tcp_send_synack                              378     350     -28
posix_cpu_clock_get_task                     272     244     -28
netif_device_attach                          116      88     -28
handle_fasteoi_irq                           244     216     -28
dev_watchdog                                 456     428     -28
blk_alloc_queue_node                         426     398     -28
__disable_irq                                 64      36     -28
tcp_write_wakeup                             312     282     -30
sys_mount                                    182     152     -30
smc_hardware_send_pkt                        798     768     -30
show_smap                                    686     656     -30
ei_tx_intr                                   560     530     -30
__d_free                                      84      54     -30
SyS_mount                                    182     152     -30
tcp_shifted_skb                              660     628     -32
skb_segment                                 1954    1922     -32
lock_mount                                   398     366     -32
dev_deactivate_many                          510     478     -32
vm_table                                     952     918     -34
tcp_send_fin                                 320     286     -34
mprotect_fixup                               428     394     -34
integrity_write_show                          34       -     -34
integrity_read_show                           34       -     -34
init_list                                    270     236     -34
do_sys_times                                 128      94     -34
blk_mq_tag_busy_iter                         114      80     -34
blk_flush_complete_seq                       708     674     -34
vmalloc_open                                  56      20     -36
user_match                                    36       -     -36
search_nested_keyrings                       742     706     -36
irq_check_poll                                36       -     -36
dev_gso_skb_destructor                        36       -     -36
__sched_fork                                 160     124     -36
nfs_direct_IO                                102      64     -38
m_next                                       190     152     -38
fsnotify_final_destroy_group                  38       -     -38
fcntl_dirnotify                              600     562     -38
__enable_irq                                 144     106     -38
sd_dif_type1_get_tag                          40       -     -40
ip_defrag                                   2902    2862     -40
free_pages_and_swap_cache                    154     114     -40
check_and_collect                             40       -     -40
tcp_send_rcvq                                296     254     -42
tcp_connect_queue_skb                        154     112     -42
sysvipc_proc_open                            108      66     -42
nfs_generic_pgio                             638     596     -42
mntput_no_expire                             272     230     -42
nfs_readdir_xdr_to_array                    1418    1374     -44
kallsyms_open                                 94      50     -44
tcp_sacktag_walk                            1092    1044     -48
sd_dif_ip_fn                                  48       -     -48
__tcp_retransmit_skb                        1172    1124     -48
inet6_lookup_listener                        730     678     -52
cpuup_canceled                               398     346     -52
__udp4_lib_lookup                            890     838     -52
tcp_enter_loss                               600     546     -54
svc_tcp_fragment_received                     68      14     -54
icmp_send                                   1580    1526     -54
cpuup_callback                               626     572     -54
vprintk_emit                                1018     960     -58
tcp_write_xmit                              2564    2506     -58
bio_integrity_get_tag                         58       -     -58
balloon_devinfo_alloc                         58       -     -58
__vfs_setlease                                58       -     -58
ext4_convert_inline_data_nolock             1106    1046     -60
d_prune_aliases                              144      84     -60
bio_integrity_set_tag                         60       -     -60
dif_type3_integrity_ip                        62       -     -62
dif_type3_integrity_crc                       62       -     -62
dif_type1_integrity_ip                        62       -     -62
dif_type1_integrity_crc                       62       -     -62
svc_xprt_do_enqueue                          386     322     -64
integrity_write_store                         64       -     -64
integrity_read_store                          64       -     -64
gre_gro_receive                              802     738     -64
blk_rq_merge_ok                              524     460     -64
sock_recv_errqueue                           398     332     -66
nlmsvc_defer_lock_rqst                        66       -     -66
ethtool_get_settings                          66       -     -66
tcp_init_congestion_ops                       68       -     -68
tcp_init_congestion_control                   94      26     -68
async_synchronize_cookie_domain              278     210     -68
tcp_send_probe0                              302     232     -70
__find_get_block                             516     446     -70
xs_create_sock                               434     362     -72
sock_tx_timestamp                             72       -     -72
bio_integrity_tag_size                        72       -     -72
skb_try_coalesce                             926     852     -74
scan_unevictable_handler                      74       -     -74
ip_recv_error                                570     496     -74
tcp_gso_segment                             1152    1076     -76
tcp_fragment                                 748     672     -76
sd_dif_type3_generate                         76       -     -76
blk_mq_clone_flush_request                    76       -     -76
aio_read_zero                                 76       -     -76
__blk_mq_end_io                               76       -     -76
__locks_copy_lock                             78       -     -78
inet_getpeer                                1238    1158     -80
__mem_open                                   118      38     -80
fcntl_setlease                               252     170     -82
dev_get_by_flags_rcu                          82       -     -82
sys_listxattr                                112      26     -86
__percpu_ref_kill_expedited                   86       -     -86
SyS_listxattr                                112      26     -86
tcp_event_data_recv                          570     482     -88
sys_llistxattr                               112      24     -88
sys_getxattr                                 118      30     -88
SyS_llistxattr                               112      24     -88
SyS_getxattr                                 118      30     -88
sys_lgetxattr                                118      28     -90
sd_dif_type3_set_tag                          90       -     -90
sd_dif_type1_generate                         90       -     -90
rcu_idle_exit_common                          92       2     -90
SyS_lgetxattr                                118      28     -90
lease_init                                    92       -     -92
async_run_entry_fn                           268     176     -92
blk_rq_check_expired                          98       -     -98
bdev_inode_switch_bdi                        184      84    -100
svc_recv                                    1750    1646    -104
sd_dif_type1_set_tag                         104       -    -104
rcu_idle_enter_common                        156      52    -104
strnicmp                                     130      24    -106
icmp_reply                                   550     442    -108
tcp_v4_gso_send_check                        112       -    -112
stop_tty                                     138      26    -112
sd_dif_type3_get_tag                         112       -    -112
vm_is_stack                                  114       -    -114
sys_removexattr                              136      22    -114
lock_may_write                               114       -    -114
SyS_removexattr                              136      22    -114
sys_lremovexattr                             136      20    -116
read_zero                                    116       -    -116
SyS_lremovexattr                             136      20    -116
sys_setxattr                                 154      34    -120
SyS_setxattr                                 154      34    -120
sys_lsetxattr                                154      32    -122
SyS_lsetxattr                                154      32    -122
lock_may_read                                124       -    -124
blk_mq_end_io                                126       -    -126
balloon_page_enqueue                         128       -    -128
start_tty                                    158      26    -132
tcp_data_queue                              2852    2714    -138
move_freepages                               284     146    -138
mmap_region                                 1302    1164    -138
__skb_tstamp_tx                              240     102    -138
sd_dif_type3_verify                          140       -    -140
percpu_ref_kill_rcu                          140       -    -140
skb_mac_gso_segment                          354     212    -142
dump_page_flags                              144       -    -144
udp4_ufo_send_check                          156       -    -156
skb_flow_get_ports                           158       -    -158
tcp_collapse                                 834     674    -160
do_tune_cpucache                            1052     892    -160
nlmsvc_testlock                              378     206    -172
bit_clear_margins                            346     172    -174
send_prio_char                               184       -    -184
ipv6_gso_send_check                          186       -    -186
tcp_sendpage                                1322    1134    -188
tcp_sendmsg                                 2482    2292    -190
copy_page_to_iter                            752     558    -194
udp6_ufo_send_check                          198       -    -198
copy_page_from_iter                          788     590    -198
check_submounts_and_drop                     200       -    -200
balloon_page_dequeue                         200       -    -200
kmem_cache_init                              790     588    -202
bio_integrity_tag                            202       -    -202
__copy_skb_header                            440     228    -212
dns_resolver_match                           218       -    -218
sd_dif_type1_verify                          224       -    -224
_submit_bh                                   676     424    -252
blk_mq_timeout_check                         268       -    -268
inet_gso_send_check                          274       -    -274
bt_for_each_free                             282       -    -282
bio_integrity_generate_verify                298       -    -298
pcpu_reclaim                                 304       -    -304
tcp_v6_gso_send_check                        318       -    -318
devkmsg_writev                               332       -    -332
nlmsvc_create_block                          336       -    -336
init_kmem_cache_node                        1248     832    -416
sock_alloc_send_pskb                         914     476    -438
dev_hard_start_xmit                         1084     632    -452
dev_queue_xmit_nit                           476       -    -476
bioset_create                                548      24    -524
ip_options_echo                              800       -    -800
create_elf_tables                            844       -    -844
skb_flow_dissect                             922       -    -922

Gr{oetje,eeting}s,

						Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
							    -- Linus Torvalds

^ permalink raw reply

* Re: [bisected] e341694e3eb5 netlink_lookup() rcu conversion causes latencies
From: Heiko Carstens @ 2014-10-20  8:21 UTC (permalink / raw)
  To: Thomas Graf
  Cc: Eric Dumazet, Sasha Levin, paulmck, Nikolay Aleksandrov,
	David S. Miller, netdev, linux-kernel, Ursula Braun
In-Reply-To: <20141011222514.GA14186@casper.infradead.org>

On Sat, Oct 11, 2014 at 11:25:14PM +0100, Thomas Graf wrote:
> On 10/11/14 at 12:32pm, Eric Dumazet wrote:
> > On Sat, 2014-10-11 at 10:36 +0200, Heiko Carstens wrote:
> > > Hi all,
> > > 
> > > it just came to my attention that commit e341694e3eb5
> > > "netlink: Convert netlink_lookup() to use RCU protected hash table"
> > > causes network latencies for me on s390.
> > > 
> > > The testcase is quite simple and 100% reproducible on s390:
> > > 
> > > Simply login via ssh to a remote system which has the above mentioned
> > > patch applied. Any action like pressing return now has significant
> > > latencies. Or in other words, working via such a connection becomes
> > > a pain ;)
> > > 
> > > I haven't debugged it, however I assume the problem is that a) the
> > > commit introduces a synchronize_net() call und b) s390 kernels
> > > usually get compiled with CONFIG_HZ_100 while most other architectures
> > > use CONFIG_HZ_1000.
> > > If I change the kernel config to CONFIG_HZ_1000 the problem goes away,
> > > however I don't consider this a fix...
> > > 
> > > Another reason why this hasn't been observed on x86 may or may not be
> > > that we haven't implemented CONFIG_HAVE_CONTEXT_TRACKING on s390 (yet).
> > > But that's just guessing...
> > 
> > CC Paul and Sasha
> 
> I think the issue here is obvious and a fix is on the way to move
> the insertion and removal to a worker to no longer require the
> synchronize_rcu().
> 
> What bothers me is that the synchronize_rcu() should only occur
> on expand/shrink and not for every table update. The default table
> size is 64.

*ping* ... is there already any patch available?

^ permalink raw reply

* [PATCH 7/7] netfilter: nft_nat: dump attributes if they are set
From: Pablo Neira Ayuso @ 2014-10-20  8:10 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1413792639-3954-1-git-send-email-pablo@netfilter.org>

Dump NFTA_NAT_REG_ADDR_MIN if this is non-zero. Same thing with
NFTA_NAT_REG_PROTO_MIN.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_nat.c |   20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index a95e0c1..afe2b0b 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -191,17 +191,19 @@ static int nft_nat_dump(struct sk_buff *skb, const struct nft_expr *expr)
 
 	if (nla_put_be32(skb, NFTA_NAT_FAMILY, htonl(priv->family)))
 		goto nla_put_failure;
-	if (nla_put_be32(skb,
-			 NFTA_NAT_REG_ADDR_MIN, htonl(priv->sreg_addr_min)))
-		goto nla_put_failure;
-	if (nla_put_be32(skb,
-			 NFTA_NAT_REG_ADDR_MAX, htonl(priv->sreg_addr_max)))
-		goto nla_put_failure;
+
+	if (priv->sreg_addr_min) {
+		if (nla_put_be32(skb, NFTA_NAT_REG_ADDR_MIN,
+				 htonl(priv->sreg_addr_min)) ||
+		    nla_put_be32(skb, NFTA_NAT_REG_ADDR_MAX,
+				 htonl(priv->sreg_addr_max)))
+			goto nla_put_failure;
+	}
+
 	if (priv->sreg_proto_min) {
 		if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MIN,
-				 htonl(priv->sreg_proto_min)))
-			goto nla_put_failure;
-		if (nla_put_be32(skb, NFTA_NAT_REG_PROTO_MAX,
+				 htonl(priv->sreg_proto_min)) ||
+		    nla_put_be32(skb, NFTA_NAT_REG_PROTO_MAX,
 				 htonl(priv->sreg_proto_max)))
 			goto nla_put_failure;
 	}
-- 
1.7.10.4


^ permalink raw reply related

* [PATCH 6/7] netfilter: nft_nat: NFTA_NAT_REG_ADDR_MAX depends on NFTA_NAT_REG_ADDR_MIN
From: Pablo Neira Ayuso @ 2014-10-20  8:10 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1413792639-3954-1-git-send-email-pablo@netfilter.org>

Interpret NFTA_NAT_REG_ADDR_MAX if NFTA_NAT_REG_ADDR_MIN is present,
otherwise, skip it. Same thing with NFTA_NAT_REG_PROTO_MAX.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_nat.c |   50 ++++++++++++++++++++++++++---------------------
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 5078f1f..a95e0c1 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -126,38 +126,44 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	priv->family = family;
 
 	if (tb[NFTA_NAT_REG_ADDR_MIN]) {
-		priv->sreg_addr_min = ntohl(nla_get_be32(
-						tb[NFTA_NAT_REG_ADDR_MIN]));
+		priv->sreg_addr_min =
+			ntohl(nla_get_be32(tb[NFTA_NAT_REG_ADDR_MIN]));
+
 		err = nft_validate_input_register(priv->sreg_addr_min);
 		if (err < 0)
 			return err;
-	}
 
-	if (tb[NFTA_NAT_REG_ADDR_MAX]) {
-		priv->sreg_addr_max = ntohl(nla_get_be32(
-						tb[NFTA_NAT_REG_ADDR_MAX]));
-		err = nft_validate_input_register(priv->sreg_addr_max);
-		if (err < 0)
-			return err;
-	} else
-		priv->sreg_addr_max = priv->sreg_addr_min;
+		if (tb[NFTA_NAT_REG_ADDR_MAX]) {
+			priv->sreg_addr_max =
+				ntohl(nla_get_be32(tb[NFTA_NAT_REG_ADDR_MAX]));
+
+			err = nft_validate_input_register(priv->sreg_addr_max);
+			if (err < 0)
+				return err;
+		} else {
+			priv->sreg_addr_max = priv->sreg_addr_min;
+		}
+	}
 
 	if (tb[NFTA_NAT_REG_PROTO_MIN]) {
-		priv->sreg_proto_min = ntohl(nla_get_be32(
-						tb[NFTA_NAT_REG_PROTO_MIN]));
+		priv->sreg_proto_min =
+			ntohl(nla_get_be32(tb[NFTA_NAT_REG_PROTO_MIN]));
+
 		err = nft_validate_input_register(priv->sreg_proto_min);
 		if (err < 0)
 			return err;
-	}
 
-	if (tb[NFTA_NAT_REG_PROTO_MAX]) {
-		priv->sreg_proto_max = ntohl(nla_get_be32(
-						tb[NFTA_NAT_REG_PROTO_MAX]));
-		err = nft_validate_input_register(priv->sreg_proto_max);
-		if (err < 0)
-			return err;
-	} else
-		priv->sreg_proto_max = priv->sreg_proto_min;
+		if (tb[NFTA_NAT_REG_PROTO_MAX]) {
+			priv->sreg_proto_max =
+				ntohl(nla_get_be32(tb[NFTA_NAT_REG_PROTO_MAX]));
+
+			err = nft_validate_input_register(priv->sreg_proto_max);
+			if (err < 0)
+				return err;
+		} else {
+			priv->sreg_proto_max = priv->sreg_proto_min;
+		}
+	}
 
 	if (tb[NFTA_NAT_FLAGS]) {
 		priv->flags = ntohl(nla_get_be32(tb[NFTA_NAT_FLAGS]));
-- 
1.7.10.4


^ permalink raw reply related

* [PATCH 5/7] netfilter: nft_nat: insufficient attribute validation
From: Pablo Neira Ayuso @ 2014-10-20  8:10 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1413792639-3954-1-git-send-email-pablo@netfilter.org>

We have to validate that we at least get an NFTA_NAT_REG_ADDR_MIN or
NFTA_NFT_REG_PROTO_MIN attribute. Reject the configuration if none
of them are present.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_nat.c |    4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 0f0af6e..5078f1f 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -99,7 +99,9 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	if (err < 0)
 		return err;
 
-	if (tb[NFTA_NAT_TYPE] == NULL)
+	if (tb[NFTA_NAT_TYPE] == NULL ||
+	    (tb[NFTA_NAT_REG_ADDR_MIN] == NULL &&
+	     tb[NFTA_NAT_REG_PROTO_MIN] == NULL))
 		return -EINVAL;
 
 	switch (ntohl(nla_get_be32(tb[NFTA_NAT_TYPE]))) {
-- 
1.7.10.4


^ permalink raw reply related

* [PATCH 2/7] netfilter: nf_tables: restrict nat/masq expressions to nat chain type
From: Pablo Neira Ayuso @ 2014-10-20  8:10 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1413792639-3954-1-git-send-email-pablo@netfilter.org>

This adds the missing validation code to avoid the use of nat/masq from
non-nat chains. The validation assumes two possible configuration
scenarios:

1) Use of nat from base chain that is not of nat type. Reject this
   configuration from the nft_*_init() path of the expression.

2) Use of nat from non-base chain. In this case, we have to wait until
   the non-base chain is referenced by at least one base chain via
   jump/goto. This is resolved from the nft_*_validate() path which is
   called from nf_tables_check_loops().

The user gets an -EOPNOTSUPP in both cases.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h  |    3 +++
 include/net/netfilter/nft_masq.h   |    3 +++
 net/ipv4/netfilter/nft_masq_ipv4.c |    1 +
 net/ipv6/netfilter/nft_masq_ipv6.c |    1 +
 net/netfilter/nf_tables_api.c      |   14 ++++++++++++++
 net/netfilter/nft_masq.c           |   12 ++++++++++++
 net/netfilter/nft_nat.c            |   12 ++++++++++++
 7 files changed, 46 insertions(+)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 3d72923..845c596 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -530,6 +530,9 @@ enum nft_chain_type {
 	NFT_CHAIN_T_MAX
 };
 
+int nft_chain_validate_dependency(const struct nft_chain *chain,
+				  enum nft_chain_type type);
+
 struct nft_stats {
 	u64			bytes;
 	u64			pkts;
diff --git a/include/net/netfilter/nft_masq.h b/include/net/netfilter/nft_masq.h
index c72729f..e2a518b 100644
--- a/include/net/netfilter/nft_masq.h
+++ b/include/net/netfilter/nft_masq.h
@@ -13,4 +13,7 @@ int nft_masq_init(const struct nft_ctx *ctx,
 
 int nft_masq_dump(struct sk_buff *skb, const struct nft_expr *expr);
 
+int nft_masq_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+		      const struct nft_data **data);
+
 #endif /* _NFT_MASQ_H_ */
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index 1c636d6..c1023c4 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -39,6 +39,7 @@ static const struct nft_expr_ops nft_masq_ipv4_ops = {
 	.eval		= nft_masq_ipv4_eval,
 	.init		= nft_masq_init,
 	.dump		= nft_masq_dump,
+	.validate	= nft_masq_validate,
 };
 
 static struct nft_expr_type nft_masq_ipv4_type __read_mostly = {
diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c
index 556262f..8a7ac68 100644
--- a/net/ipv6/netfilter/nft_masq_ipv6.c
+++ b/net/ipv6/netfilter/nft_masq_ipv6.c
@@ -39,6 +39,7 @@ static const struct nft_expr_ops nft_masq_ipv6_ops = {
 	.eval		= nft_masq_ipv6_eval,
 	.init		= nft_masq_init,
 	.dump		= nft_masq_dump,
+	.validate	= nft_masq_validate,
 };
 
 static struct nft_expr_type nft_masq_ipv6_type __read_mostly = {
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 556a0df..65eb2a1 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3744,6 +3744,20 @@ static const struct nfnetlink_subsystem nf_tables_subsys = {
 	.abort		= nf_tables_abort,
 };
 
+int nft_chain_validate_dependency(const struct nft_chain *chain,
+				  enum nft_chain_type type)
+{
+	const struct nft_base_chain *basechain;
+
+	if (chain->flags & NFT_BASE_CHAIN) {
+		basechain = nft_base_chain(chain);
+		if (basechain->type->type != type)
+			return -EOPNOTSUPP;
+	}
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nft_chain_validate_dependency);
+
 /*
  * Loop detection - walk through the ruleset beginning at the destination chain
  * of a new jump until either the source chain is reached (loop) or all
diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c
index 6637bab..d1ffd5e 100644
--- a/net/netfilter/nft_masq.c
+++ b/net/netfilter/nft_masq.c
@@ -26,6 +26,11 @@ int nft_masq_init(const struct nft_ctx *ctx,
 		  const struct nlattr * const tb[])
 {
 	struct nft_masq *priv = nft_expr_priv(expr);
+	int err;
+
+	err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
+	if (err < 0)
+		return err;
 
 	if (tb[NFTA_MASQ_FLAGS] == NULL)
 		return 0;
@@ -55,5 +60,12 @@ nla_put_failure:
 }
 EXPORT_SYMBOL_GPL(nft_masq_dump);
 
+int nft_masq_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
+		      const struct nft_data **data)
+{
+	return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
+}
+EXPORT_SYMBOL_GPL(nft_masq_validate);
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>");
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 799550b..0f0af6e 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -95,6 +95,10 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	u32 family;
 	int err;
 
+	err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
+	if (err < 0)
+		return err;
+
 	if (tb[NFTA_NAT_TYPE] == NULL)
 		return -EINVAL;
 
@@ -205,6 +209,13 @@ nla_put_failure:
 	return -1;
 }
 
+static int nft_nat_validate(const struct nft_ctx *ctx,
+			    const struct nft_expr *expr,
+			    const struct nft_data **data)
+{
+	return nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
+}
+
 static struct nft_expr_type nft_nat_type;
 static const struct nft_expr_ops nft_nat_ops = {
 	.type           = &nft_nat_type,
@@ -212,6 +223,7 @@ static const struct nft_expr_ops nft_nat_ops = {
 	.eval           = nft_nat_eval,
 	.init           = nft_nat_init,
 	.dump           = nft_nat_dump,
+	.validate	= nft_nat_validate,
 };
 
 static struct nft_expr_type nft_nat_type __read_mostly = {
-- 
1.7.10.4


^ permalink raw reply related

* [PATCH 4/7] netfilter: nft_compat: validate chain type in match/target
From: Pablo Neira Ayuso @ 2014-10-20  8:10 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1413792639-3954-1-git-send-email-pablo@netfilter.org>

We have to validate the real chain type to ensure that matches/targets
are not used out from their scope (eg. MASQUERADE in nat chain type).
The existing validation relies on the table name, but this is not
sufficient since userspace can fool us by using the appropriate table
name with a different chain type.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_compat.c |   75 ++++++++++++++++++++++++++++++++++++++------
 1 file changed, 66 insertions(+), 9 deletions(-)

diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 44ae273..0480f57 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -19,9 +19,52 @@
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
 #include <linux/netfilter_ipv6/ip6_tables.h>
-#include <asm/uaccess.h> /* for set_fs */
 #include <net/netfilter/nf_tables.h>
 
+static const struct {
+       const char	*name;
+       u8		type;
+} table_to_chaintype[] = {
+       { "filter",     NFT_CHAIN_T_DEFAULT },
+       { "raw",        NFT_CHAIN_T_DEFAULT },
+       { "security",   NFT_CHAIN_T_DEFAULT },
+       { "mangle",     NFT_CHAIN_T_ROUTE },
+       { "nat",        NFT_CHAIN_T_NAT },
+       { },
+};
+
+static int nft_compat_table_to_chaintype(const char *table)
+{
+	int i;
+
+	for (i = 0; table_to_chaintype[i].name != NULL; i++) {
+		if (strcmp(table_to_chaintype[i].name, table) == 0)
+			return table_to_chaintype[i].type;
+	}
+
+	return -1;
+}
+
+static int nft_compat_chain_validate_dependency(const char *tablename,
+						const struct nft_chain *chain)
+{
+	enum nft_chain_type type;
+	const struct nft_base_chain *basechain;
+
+	if (!tablename || !(chain->flags & NFT_BASE_CHAIN))
+		return 0;
+
+	type = nft_compat_table_to_chaintype(tablename);
+	if (type < 0)
+		return -EINVAL;
+
+	basechain = nft_base_chain(chain);
+	if (basechain->type->type != type)
+		return -EINVAL;
+
+	return 0;
+}
+
 union nft_entry {
 	struct ipt_entry e4;
 	struct ip6t_entry e6;
@@ -153,6 +196,10 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	union nft_entry e = {};
 	int ret;
 
+	ret = nft_compat_chain_validate_dependency(target->table, ctx->chain);
+	if (ret < 0)
+		goto err;
+
 	target_compat_from_user(target, nla_data(tb[NFTA_TARGET_INFO]), info);
 
 	if (ctx->nla[NFTA_RULE_COMPAT]) {
@@ -218,6 +265,7 @@ static int nft_target_validate(const struct nft_ctx *ctx,
 {
 	struct xt_target *target = expr->ops->data;
 	unsigned int hook_mask = 0;
+	int ret;
 
 	if (ctx->chain->flags & NFT_BASE_CHAIN) {
 		const struct nft_base_chain *basechain =
@@ -225,11 +273,13 @@ static int nft_target_validate(const struct nft_ctx *ctx,
 		const struct nf_hook_ops *ops = &basechain->ops[0];
 
 		hook_mask = 1 << ops->hooknum;
-		if (hook_mask & target->hooks)
-			return 0;
+		if (!(hook_mask & target->hooks))
+			return -EINVAL;
 
-		/* This target is being called from an invalid chain */
-		return -EINVAL;
+		ret = nft_compat_chain_validate_dependency(target->table,
+							   ctx->chain);
+		if (ret < 0)
+			return ret;
 	}
 	return 0;
 }
@@ -324,6 +374,10 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	union nft_entry e = {};
 	int ret;
 
+	ret = nft_compat_chain_validate_dependency(match->name, ctx->chain);
+	if (ret < 0)
+		goto err;
+
 	match_compat_from_user(match, nla_data(tb[NFTA_MATCH_INFO]), info);
 
 	if (ctx->nla[NFTA_RULE_COMPAT]) {
@@ -383,6 +437,7 @@ static int nft_match_validate(const struct nft_ctx *ctx,
 {
 	struct xt_match *match = expr->ops->data;
 	unsigned int hook_mask = 0;
+	int ret;
 
 	if (ctx->chain->flags & NFT_BASE_CHAIN) {
 		const struct nft_base_chain *basechain =
@@ -390,11 +445,13 @@ static int nft_match_validate(const struct nft_ctx *ctx,
 		const struct nf_hook_ops *ops = &basechain->ops[0];
 
 		hook_mask = 1 << ops->hooknum;
-		if (hook_mask & match->hooks)
-			return 0;
+		if (!(hook_mask & match->hooks))
+			return -EINVAL;
 
-		/* This match is being called from an invalid chain */
-		return -EINVAL;
+		ret = nft_compat_chain_validate_dependency(match->name,
+							   ctx->chain);
+		if (ret < 0)
+			return ret;
 	}
 	return 0;
 }
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 1/7] netfilter: missing module license in the nf_reject_ipvX modules
From: Pablo Neira Ayuso @ 2014-10-20  8:10 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1413792639-3954-1-git-send-email-pablo@netfilter.org>

[   23.545204] nf_reject_ipv4: module license 'unspecified' taints kernel.

Fixes: c8d7b98 ("netfilter: move nf_send_resetX() code to nf_reject_ipvX modules")
Reported-by: Dave Young <dyoung@redhat.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/nf_reject_ipv4.c |    3 +++
 net/ipv6/netfilter/nf_reject_ipv6.c |    4 ++++
 2 files changed, 7 insertions(+)

diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index b023b4e..92b303d 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -6,6 +6,7 @@
  * published by the Free Software Foundation.
  */
 
+#include <linux/module.h>
 #include <net/ip.h>
 #include <net/tcp.h>
 #include <net/route.h>
@@ -125,3 +126,5 @@ void nf_send_reset(struct sk_buff *oldskb, int hook)
 	kfree_skb(nskb);
 }
 EXPORT_SYMBOL_GPL(nf_send_reset);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 5f5f043..20d9def 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -5,6 +5,8 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
+
+#include <linux/module.h>
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
 #include <net/ip6_fib.h>
@@ -161,3 +163,5 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
 		ip6_local_out(nskb);
 }
 EXPORT_SYMBOL_GPL(nf_send_reset6);
+
+MODULE_LICENSE("GPL");
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 3/7] netfilter: nft_compat: fix hook validation for non-base chains
From: Pablo Neira Ayuso @ 2014-10-20  8:10 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1413792639-3954-1-git-send-email-pablo@netfilter.org>

Set hook_mask to zero for non-base chains, otherwise people may hit
bogus errors from the xt_check_target() and xt_check_match() when
validating the uninitialized hook_mask.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_compat.c |    4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 7e2683c..44ae273 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -95,6 +95,8 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
 		const struct nf_hook_ops *ops = &basechain->ops[0];
 
 		par->hook_mask = 1 << ops->hooknum;
+	} else {
+		par->hook_mask = 0;
 	}
 	par->family	= ctx->afi->family;
 }
@@ -293,6 +295,8 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
 		const struct nf_hook_ops *ops = &basechain->ops[0];
 
 		par->hook_mask = 1 << ops->hooknum;
+	} else {
+		par->hook_mask = 0;
 	}
 	par->family	= ctx->afi->family;
 }
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 0/7] netfilter fixes for net
From: Pablo Neira Ayuso @ 2014-10-20  8:10 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev

Hi David,

The following patchset contains netfilter fixes for your net tree,
they are:

1) Fix missing MODULE_LICENSE() in the new nf_reject_ipv{4,6} modules.

2) Restrict nat and masq expressions to the nat chain type. Otherwise,
   users may crash their kernel if they attach a nat/masq rule to a non
   nat chain.

3) Fix hook validation in nft_compat when non-base chains are used.
   Basically, initialize hook_mask to zero.

4) Make sure you use match/targets in nft_compat from the right chain
   type. The existing validation relies on the table name which can be
   avoided by

5) Better netlink attribute validation in nft_nat. This expression has
   to reject the configuration when no address and proto configurations
   are specified.

6) Interpret NFTA_NAT_REG_*_MAX if only if NFTA_NAT_REG_*_MIN is set.
   Yet another sanity check to reject incorrect configurations from
   userspace.

7) Conditional NAT attribute dumping depending on the existing
   configuration.

You can pull these changes from:

  git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git

Thanks!

----------------------------------------------------------------

The following changes since commit 01d2d484e49e9bc0ed9b5fdaf345a0e2bf35ffed:

  Merge branch 'bcmgenet_systemport' (2014-10-10 15:39:22 -0400)

are available in the git repository at:


  git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf.git master

for you to fetch changes up to 1e2d56a5d33a7e1fcd21ed3859f52596d02708b0:

  netfilter: nft_nat: dump attributes if they are set (2014-10-18 14:16:13 +0200)

----------------------------------------------------------------
Pablo Neira Ayuso (7):
      netfilter: missing module license in the nf_reject_ipvX modules
      netfilter: nf_tables: restrict nat/masq expressions to nat chain type
      netfilter: nft_compat: fix hook validation for non-base chains
      netfilter: nft_compat: validate chain type in match/target
      netfilter: nft_nat: insufficient attribute validation
      netfilter: nft_nat: NFTA_NAT_REG_ADDR_MAX depends on NFTA_NAT_REG_ADDR_MIN
      netfilter: nft_nat: dump attributes if they are set

 include/net/netfilter/nf_tables.h   |    3 ++
 include/net/netfilter/nft_masq.h    |    3 ++
 net/ipv4/netfilter/nf_reject_ipv4.c |    3 ++
 net/ipv4/netfilter/nft_masq_ipv4.c  |    1 +
 net/ipv6/netfilter/nf_reject_ipv6.c |    4 ++
 net/ipv6/netfilter/nft_masq_ipv6.c  |    1 +
 net/netfilter/nf_tables_api.c       |   14 ++++++
 net/netfilter/nft_compat.c          |   79 ++++++++++++++++++++++++++++----
 net/netfilter/nft_masq.c            |   12 +++++
 net/netfilter/nft_nat.c             |   86 ++++++++++++++++++++++-------------
 10 files changed, 165 insertions(+), 41 deletions(-)

^ permalink raw reply

* [PATCH] drivers: net: xgene: Add missing initialization in xgene_enet_ecc_init()
From: Geert Uytterhoeven @ 2014-10-20  8:08 UTC (permalink / raw)
  To: David S. Miller, Iyappan Subramanian, Keyur Chudgar
  Cc: netdev, linux-kernel, Geert Uytterhoeven

drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c: In function ‘xgene_enet_ecc_init’:
drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c:126: warning: ‘data’ may be used uninitialized in this function

Depending on the arbitrary value on the stack, the loop may terminate
too early, and cause a bogus -ENODEV failure.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
index e6d24c2101982444..19e13583b4259cd4 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
@@ -123,7 +123,7 @@ static u32 xgene_enet_rd_mac(struct xgene_enet_pdata *p, u32 rd_addr)
 static int xgene_enet_ecc_init(struct xgene_enet_pdata *p)
 {
 	struct net_device *ndev = p->ndev;
-	u32 data;
+	u32 data = 0;
 	int i;
 
 	xgene_enet_wr_diag_csr(p, ENET_CFG_MEM_RAM_SHUTDOWN_ADDR, 0);
-- 
1.9.1

^ permalink raw reply related

* Re: [GIT] Networking
From: Pablo Neira Ayuso @ 2014-10-20  7:53 UTC (permalink / raw)
  To: David Miller; +Cc: torvalds, akpm, netdev, linux-kernel
In-Reply-To: <20141019.210314.1388340101577924200.davem@davemloft.net>

On Sun, Oct 19, 2014 at 09:03:14PM -0400, David Miller wrote:
> From: Linus Torvalds <torvalds@linux-foundation.org>
> Date: Sun, 19 Oct 2014 17:32:15 -0700
> 
> > Looks like the module license issue was just overlooked when moving
> > the code out in commit c8d7b98bec43 ("netfilter: move nf_send_resetX()
> > code to nf_reject_ipvX modules").
> 
> I think Pablo has a patch pending to address this, and indeed he does:
> 
> 	http://marc.info/?l=linux-netdev&m=141293491712312&w=2
> 
> Pablo please push this to me soon, thanks.

I'll send you this batch today. Thanks.

^ permalink raw reply

* Re: [PATCH 2/4] net: make skb_gso_segment error handling more robust
From: Florian Westphal @ 2014-10-20  7:05 UTC (permalink / raw)
  To: David Miller; +Cc: fw, netdev, edumazet
In-Reply-To: <20141019.203943.579204096575757665.davem@davemloft.net>

David Miller <davem@davemloft.net> wrote:
> From: Florian Westphal <fw@strlen.de>
> Date: Sun, 19 Oct 2014 22:42:19 +0200
> 
> > skb_gso_segment has three possible return values:
> > 1. a pointer to the first segmented skb
> > 2. an errno value (IS_ERR())
> > 3. NULL.  This can happen when GSO is used for header verification.
> > 
> > However, several callers currently test IS_ERR instead of IS_ERR_OR_NULL
> > and would oops when NULL is returned.
> > 
> > Note that these call sites should never actually see such a NULL return
> > value; all callers mask out the GSO bits in the feature argument.
> > 
> > However, in the past, there have been issues with some protocol handlers
> > erronously not respecting the specified feature mask in some cases.
> > 
> > Signed-off-by: Florian Westphal <fw@strlen.de>
> 
> I don't think it makes sense to return PTR_ERR(p) when
> p is NULL.

Good point. Will respin.

^ permalink raw reply

* [PATCH RFC v3 2/3] virtio_net: bql
From: Michael S. Tsirkin @ 2014-10-20  6:52 UTC (permalink / raw)
  To: linux-kernel, netdev; +Cc: netdev, virtualization
In-Reply-To: <1413787824-16130-1-git-send-email-mst@redhat.com>

Improve tx batching using byte queue limits.
Should be especially effective for MQ.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/net/virtio_net.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 14f4cda..b83d39d 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -227,6 +227,7 @@ static unsigned int free_old_xmit_skbs(struct netdev_queue *txq,
 	struct virtnet_info *vi = sq->vq->vdev->priv;
 	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
 	unsigned int packets = 0;
+	unsigned int bytes = 0;
 
 	while (packets < budget &&
 	       (skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
@@ -234,6 +235,7 @@ static unsigned int free_old_xmit_skbs(struct netdev_queue *txq,
 
 		u64_stats_update_begin(&stats->tx_syncp);
 		stats->tx_bytes += skb->len;
+		bytes += skb->len;
 		stats->tx_packets++;
 		u64_stats_update_end(&stats->tx_syncp);
 
@@ -241,6 +243,8 @@ static unsigned int free_old_xmit_skbs(struct netdev_queue *txq,
 		packets++;
 	}
 
+	netdev_tx_completed_queue(txq, packets, bytes);
+
 	if (sq->vq->num_free >= 2+MAX_SKB_FRAGS)
 		netif_tx_start_queue(txq);
 
@@ -959,6 +963,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 	int err;
 	struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
 	bool kick = !skb->xmit_more;
+	unsigned int bytes = skb->len;
 
 	virtqueue_disable_cb(sq->vq);
 
@@ -976,6 +981,8 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 		return NETDEV_TX_OK;
 	}
 
+	netdev_tx_sent_queue(txq, bytes);
+
 	/* Apparently nice girls don't return TX_BUSY; stop the queue
 	 * before it gets out of hand.  Naturally, this wastes entries. */
 	if (sq->vq->num_free < 2+MAX_SKB_FRAGS)
-- 
MST

^ permalink raw reply related

* [PATCH RFC v3 1/3] virtio_net: enable tx interrupt
From: Michael S. Tsirkin @ 2014-10-20  6:52 UTC (permalink / raw)
  To: linux-kernel, netdev; +Cc: netdev, virtualization
In-Reply-To: <1413787824-16130-1-git-send-email-mst@redhat.com>

On newer hosts that support delayed tx interrupts,
we probably don't have much to gain from orphaning
packets early.

Based on patch by Jason Wang.

Note: this might degrade performance for
hosts without event idx support.
Should be addressed by the next patch.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/net/virtio_net.c | 133 +++++++++++++++++++++++++++++++----------------
 1 file changed, 89 insertions(+), 44 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 13d0a8b..14f4cda 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -72,6 +72,8 @@ struct send_queue {
 
 	/* Name of the send queue: output.$index */
 	char name[40];
+
+	struct napi_struct napi;
 };
 
 /* Internal representation of a receive virtqueue */
@@ -217,15 +219,41 @@ static struct page *get_a_page(struct receive_queue *rq, gfp_t gfp_mask)
 	return p;
 }
 
+static unsigned int free_old_xmit_skbs(struct netdev_queue *txq,
+				       struct send_queue *sq, int budget)
+{
+	struct sk_buff *skb;
+	unsigned int len;
+	struct virtnet_info *vi = sq->vq->vdev->priv;
+	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
+	unsigned int packets = 0;
+
+	while (packets < budget &&
+	       (skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
+		pr_debug("Sent skb %p\n", skb);
+
+		u64_stats_update_begin(&stats->tx_syncp);
+		stats->tx_bytes += skb->len;
+		stats->tx_packets++;
+		u64_stats_update_end(&stats->tx_syncp);
+
+		dev_kfree_skb_any(skb);
+		packets++;
+	}
+
+	if (sq->vq->num_free >= 2+MAX_SKB_FRAGS)
+		netif_tx_start_queue(txq);
+
+	return packets;
+}
+
 static void skb_xmit_done(struct virtqueue *vq)
 {
 	struct virtnet_info *vi = vq->vdev->priv;
+	struct send_queue *sq = &vi->sq[vq2txq(vq)];
 
-	/* Suppress further interrupts. */
-	virtqueue_disable_cb(vq);
-
-	/* We were probably waiting for more output buffers. */
-	netif_wake_subqueue(vi->dev, vq2txq(vq));
+	virtqueue_disable_cb(sq->vq);
+	napi_schedule(&sq->napi);
 }
 
 static unsigned int mergeable_ctx_to_buf_truesize(unsigned long mrg_ctx)
@@ -774,6 +802,31 @@ again:
 	return received;
 }
 
+static int virtnet_poll_tx(struct napi_struct *napi, int budget)
+{
+	struct send_queue *sq =
+		container_of(napi, struct send_queue, napi);
+	struct virtnet_info *vi = sq->vq->vdev->priv;
+	struct netdev_queue *txq = netdev_get_tx_queue(vi->dev, vq2txq(sq->vq));
+	unsigned int sent;
+
+	__netif_tx_lock(txq, smp_processor_id());
+	sent = free_old_xmit_skbs(txq, sq, budget);
+	if (sent < budget) {
+		napi_complete(napi);
+		/* Note: we must enable cb *after* napi_complete, because
+		 * napi_schedule calls from callbacks that trigger before
+		 * napi_complete are ignored.
+		 */
+		if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
+			virtqueue_disable_cb(sq->vq);
+			napi_schedule(&sq->napi);
+		}
+	}
+	__netif_tx_unlock(txq);
+	return sent;
+}
+
 #ifdef CONFIG_NET_RX_BUSY_POLL
 /* must be called with local_bh_disable()d */
 static int virtnet_busy_poll(struct napi_struct *napi)
@@ -822,30 +875,12 @@ static int virtnet_open(struct net_device *dev)
 			if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
 				schedule_delayed_work(&vi->refill, 0);
 		virtnet_napi_enable(&vi->rq[i]);
+		napi_enable(&vi->sq[i].napi);
 	}
 
 	return 0;
 }
 
-static void free_old_xmit_skbs(struct send_queue *sq)
-{
-	struct sk_buff *skb;
-	unsigned int len;
-	struct virtnet_info *vi = sq->vq->vdev->priv;
-	struct virtnet_stats *stats = this_cpu_ptr(vi->stats);
-
-	while ((skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
-		pr_debug("Sent skb %p\n", skb);
-
-		u64_stats_update_begin(&stats->tx_syncp);
-		stats->tx_bytes += skb->len;
-		stats->tx_packets++;
-		u64_stats_update_end(&stats->tx_syncp);
-
-		dev_kfree_skb_any(skb);
-	}
-}
-
 static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 {
 	struct skb_vnet_hdr *hdr;
@@ -911,7 +946,9 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
 		sg_set_buf(sq->sg, hdr, hdr_len);
 		num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
 	}
-	return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
+
+	return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb,
+				    GFP_ATOMIC);
 }
 
 static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -923,8 +960,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct netdev_queue *txq = netdev_get_tx_queue(dev, qnum);
 	bool kick = !skb->xmit_more;
 
-	/* Free up any pending old buffers before queueing new ones. */
-	free_old_xmit_skbs(sq);
+	virtqueue_disable_cb(sq->vq);
 
 	/* Try to transmit */
 	err = xmit_skb(sq, skb);
@@ -940,27 +976,26 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 		return NETDEV_TX_OK;
 	}
 
-	/* Don't wait up for transmitted skbs to be freed. */
-	skb_orphan(skb);
-	nf_reset(skb);
-
 	/* Apparently nice girls don't return TX_BUSY; stop the queue
 	 * before it gets out of hand.  Naturally, this wastes entries. */
-	if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
+	if (sq->vq->num_free < 2+MAX_SKB_FRAGS)
 		netif_stop_subqueue(dev, qnum);
-		if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
-			/* More just got used, free them then recheck. */
-			free_old_xmit_skbs(sq);
-			if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
-				netif_start_subqueue(dev, qnum);
-				virtqueue_disable_cb(sq->vq);
-			}
-		}
-	}
 
 	if (kick || netif_xmit_stopped(txq))
 		virtqueue_kick(sq->vq);
 
+	/* Try to pop off some buffers before we re-enable callbacks.
+	 * It makes sense to do it after kick, since that causes
+	 * device to process packets.
+	 */
+	if (free_old_xmit_skbs(txq, sq, NAPI_POLL_WEIGHT) < NAPI_POLL_WEIGHT) {
+		if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
+			virtqueue_disable_cb(sq->vq);
+			napi_schedule(&sq->napi);
+		}
+	} else {
+		napi_schedule(&sq->napi);
+	}
 	return NETDEV_TX_OK;
 }
 
@@ -1137,8 +1172,10 @@ static int virtnet_close(struct net_device *dev)
 	/* Make sure refill_work doesn't re-enable napi! */
 	cancel_delayed_work_sync(&vi->refill);
 
-	for (i = 0; i < vi->max_queue_pairs; i++)
+	for (i = 0; i < vi->max_queue_pairs; i++) {
 		napi_disable(&vi->rq[i].napi);
+		napi_disable(&vi->sq[i].napi);
+	}
 
 	return 0;
 }
@@ -1457,8 +1494,10 @@ static void virtnet_free_queues(struct virtnet_info *vi)
 {
 	int i;
 
-	for (i = 0; i < vi->max_queue_pairs; i++)
+	for (i = 0; i < vi->max_queue_pairs; i++) {
 		netif_napi_del(&vi->rq[i].napi);
+		netif_napi_del(&vi->sq[i].napi);
+	}
 
 	kfree(vi->rq);
 	kfree(vi->sq);
@@ -1612,6 +1651,8 @@ static int virtnet_alloc_queues(struct virtnet_info *vi)
 		netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
 			       napi_weight);
 		napi_hash_add(&vi->rq[i].napi);
+		netif_napi_add(vi->dev, &vi->sq[i].napi, virtnet_poll_tx,
+			       napi_weight);
 
 		sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
 		ewma_init(&vi->rq[i].mrg_avg_pkt_len, 1, RECEIVE_AVG_WEIGHT);
@@ -1916,8 +1957,10 @@ static int virtnet_freeze(struct virtio_device *vdev)
 	if (netif_running(vi->dev)) {
 		for (i = 0; i < vi->max_queue_pairs; i++) {
 			napi_disable(&vi->rq[i].napi);
+			napi_disable(&vi->sq[i].napi);
 			napi_hash_del(&vi->rq[i].napi);
 			netif_napi_del(&vi->rq[i].napi);
+			netif_napi_del(&vi->sq[i].napi);
 		}
 	}
 
@@ -1942,8 +1985,10 @@ static int virtnet_restore(struct virtio_device *vdev)
 			if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
 				schedule_delayed_work(&vi->refill, 0);
 
-		for (i = 0; i < vi->max_queue_pairs; i++)
+		for (i = 0; i < vi->max_queue_pairs; i++) {
 			virtnet_napi_enable(&vi->rq[i]);
+			napi_enable(&vi->sq[i].napi);
+		}
 	}
 
 	netif_device_attach(vi->dev);
-- 
MST

^ permalink raw reply related

* [PATCH RFC v3 0/3] virtio_net: enabling tx interrupts
From: Michael S. Tsirkin @ 2014-10-20  6:52 UTC (permalink / raw)
  To: linux-kernel, netdev; +Cc: Jason Wang

RFC patches to enable tx interrupts.
This is to demonstrate how this can be done without
core virtio changes, and to make sure I understand
the new APIs correctly.

Testing TBD, I was asked for a version for early testing.

Applies on top of patch: "virtio_net: fix use after free"
that I recently sent.

Changes from v3:
	clean up code, address issues raised by Jason
Changes from v1:
        address comments by Jason Wang, use delayed cb everywhere
        rebased Jason's patch on top of mine and include it (with some tweaks)

Jason Wang (1):
  virtio-net: optimize free_old_xmit_skbs stats

Michael S. Tsirkin (2):
  virtio_net: enable tx interrupt
  virtio_net: bql

 drivers/net/virtio_net.c | 144 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 101 insertions(+), 43 deletions(-)

-- 
MST

^ permalink raw reply

* [PATCH RFC v3 3/3] virtio-net: optimize free_old_xmit_skbs stats
From: Michael S. Tsirkin @ 2014-10-20  6:52 UTC (permalink / raw)
  To: linux-kernel, netdev; +Cc: Jason Wang, Rusty Russell, virtualization, netdev
In-Reply-To: <1413787824-16130-1-git-send-email-mst@redhat.com>

From: Jason Wang <jasowang@redhat.com>

We already have counters for sent packets and sent bytes.
Use them to reduce the number of u64_stats_update_begin/end().

Take care not to bother with stats update when called
speculatively.

Based on a patch by Jason Wang.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/net/virtio_net.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index b83d39d..c2b69f8 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -233,16 +233,22 @@ static unsigned int free_old_xmit_skbs(struct netdev_queue *txq,
 	       (skb = virtqueue_get_buf(sq->vq, &len)) != NULL) {
 		pr_debug("Sent skb %p\n", skb);
 
-		u64_stats_update_begin(&stats->tx_syncp);
-		stats->tx_bytes += skb->len;
 		bytes += skb->len;
-		stats->tx_packets++;
-		u64_stats_update_end(&stats->tx_syncp);
+		packets++;
 
 		dev_kfree_skb_any(skb);
-		packets++;
 	}
 
+	/* Avoid overhead when no packets have been processed
+	 * happens when called speculatively from start_xmit. */
+	if (!packets)
+		return 0;
+
+	u64_stats_update_begin(&stats->tx_syncp);
+	stats->tx_bytes += bytes;
+	stats->tx_packets += packets;
+	u64_stats_update_end(&stats->tx_syncp);
+
 	netdev_tx_completed_queue(txq, packets, bytes);
 
 	if (sq->vq->num_free >= 2+MAX_SKB_FRAGS)
-- 
MST

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox