Netdev List

Netdev List
 help / color / mirror / Atom feed

* MUTUAL PROJECT
From: jing01lee @ 2013-09-28  6:53 UTC (permalink / raw)
  To: Recipients

Hello

I have a business proposal for you. There is no risks involved.
Pls reply for briefs. 
Mr Lee

^ permalink raw reply

* [PATCH RESEND] iproute2: GRE over IPv6 tunnel support.
From: Dmitry Kozlov @ 2013-09-28  7:32 UTC (permalink / raw)
  To: Hannes Frederic Sowa; +Cc: Templin, Fred L, Stephen Hemminger, netdev

GRE over IPv6 tunnel support.

Signed-off-by: Dmitry Kozlov <xeb@mail.ru>
---
 ip/Makefile    |   3 +-
 ip/ip6tunnel.c | 131 ++++++++++++++++---
 ip/iplink.c    |   7 +-
 ip/link_gre6.c | 398 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 516 insertions(+), 23 deletions(-)
 create mode 100644 ip/link_gre6.c

diff --git a/ip/Makefile b/ip/Makefile
index 48bd4a1..f10d22f 100644
--- a/ip/Makefile
+++ b/ip/Makefile
@@ -5,7 +5,7 @@ IPOBJ=ip.o ipaddress.o ipaddrlabel.o iproute.o iprule.o ipnetns.o \
     iplink_vlan.o link_veth.o link_gre.o iplink_can.o \
     iplink_macvlan.o iplink_macvtap.o ipl2tp.o link_vti.o \
     iplink_vxlan.o tcp_metrics.o iplink_ipoib.o ipnetconf.o link_ip6tnl.o \
-    link_iptnl.o
+    link_iptnl.o link_gre6.o
 
 RTMONOBJ=rtmon.o
 
@@ -23,7 +23,6 @@ all: $(TARGETS) $(SCRIPTS)
 
 ip: $(IPOBJ) $(LIBNETLINK)
 
-
 rtmon: $(RTMONOBJ)
 
 install: all
diff --git a/ip/ip6tunnel.c b/ip/ip6tunnel.c
index 216e982..463be42 100644
--- a/ip/ip6tunnel.c
+++ b/ip/ip6tunnel.c
@@ -48,11 +48,12 @@ static void usage(void) __attribute__((noreturn));
 static void usage(void)
 {
 	fprintf(stderr, "Usage: ip -f inet6 tunnel { add | change | del | show } [ NAME ]\n");
-	fprintf(stderr, "          [ mode { ip6ip6 | ipip6 | any } ]\n");
+	fprintf(stderr, "          [ mode { ip6ip6 | ipip6 | ip6gre | any } ]\n");
 	fprintf(stderr, "          [ remote ADDR local ADDR ] [ dev PHYS_DEV ]\n");
 	fprintf(stderr, "          [ encaplimit ELIM ]\n");
 	fprintf(stderr ,"          [ hoplimit TTL ] [ tclass TCLASS ] [ flowlabel FLOWLABEL ]\n");
 	fprintf(stderr, "          [ dscp inherit ]\n");
+	fprintf(stderr, "          [ [i|o]seq ] [ [i|o]key KEY ] [ [i|o]csum ]\n");
 	fprintf(stderr, "\n");
 	fprintf(stderr, "Where: NAME      := STRING\n");
 	fprintf(stderr, "       ADDR      := IPV6_ADDRESS\n");
@@ -62,10 +63,11 @@ static void usage(void)
 		DEFAULT_TNL_HOP_LIMIT);
 	fprintf(stderr, "       TCLASS    := { 0x0..0xff | inherit }\n");
 	fprintf(stderr, "       FLOWLABEL := { 0x0..0xfffff | inherit }\n");
+	fprintf(stderr, "       KEY       := { DOTTED_QUAD | NUMBER }\n");
 	exit(-1);
 }
 
-static void print_tunnel(struct ip6_tnl_parm *p)
+static void print_tunnel(struct ip6_tnl_parm2 *p)
 {
 	char remote[64];
 	char local[64];
@@ -104,9 +106,29 @@ static void print_tunnel(struct ip6_tnl_parm *p)
 
 	if (p->flags & IP6_TNL_F_RCV_DSCP_COPY)
 		printf(" dscp inherit");
+
+	if (p->proto == IPPROTO_GRE) {
+		if ((p->i_flags&GRE_KEY) && (p->o_flags&GRE_KEY) && p->o_key == p->i_key)
+			printf(" key %u", ntohl(p->i_key));
+		else if ((p->i_flags|p->o_flags)&GRE_KEY) {
+			if (p->i_flags&GRE_KEY)
+				printf(" ikey %u ", ntohl(p->i_key));
+			if (p->o_flags&GRE_KEY)
+				printf(" okey %u ", ntohl(p->o_key));
+		}
+
+		if (p->i_flags&GRE_SEQ)
+			printf("%s  Drop packets out of sequence.\n", _SL_);
+		if (p->i_flags&GRE_CSUM)
+			printf("%s  Checksum in received packet is required.", _SL_);
+		if (p->o_flags&GRE_SEQ)
+			printf("%s  Sequence packets on output.", _SL_);
+		if (p->o_flags&GRE_CSUM)
+			printf("%s  Checksum output packets.", _SL_);
+	}
 }
 
-static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm *p)
+static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm2 *p)
 {
 	int count = 0;
 	char medium[IFNAMSIZ];
@@ -124,6 +146,9 @@ static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm *p)
 				 strcmp(*argv, "ipip6") == 0 ||
 				 strcmp(*argv, "ip4ip6") == 0)
 				p->proto = IPPROTO_IPIP;
+			else if (strcmp(*argv, "ip6gre") == 0 ||
+				 strcmp(*argv, "gre/ipv6") == 0)
+				p->proto = IPPROTO_GRE;
 			else if (strcmp(*argv, "any/ipv6") == 0 ||
 				 strcmp(*argv, "any") == 0)
 				p->proto = 0;
@@ -202,6 +227,60 @@ static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm *p)
 			if (strcmp(*argv, "inherit") != 0)
 				invarg("not inherit", *argv);
 			p->flags |= IP6_TNL_F_RCV_DSCP_COPY;
+		} else if (strcmp(*argv, "key") == 0) {
+			unsigned uval;
+			NEXT_ARG();
+			p->i_flags |= GRE_KEY;
+			p->o_flags |= GRE_KEY;
+			if (strchr(*argv, '.'))
+				p->i_key = p->o_key = get_addr32(*argv);
+			else {
+				if (get_unsigned(&uval, *argv, 0)<0) {
+					fprintf(stderr, "invalid value of \"key\"\n");
+					exit(-1);
+				}
+				p->i_key = p->o_key = htonl(uval);
+			}
+		} else if (strcmp(*argv, "ikey") == 0) {
+			unsigned uval;
+			NEXT_ARG();
+			p->i_flags |= GRE_KEY;
+			if (strchr(*argv, '.'))
+				p->i_key = get_addr32(*argv);
+			else {
+				if (get_unsigned(&uval, *argv, 0)<0) {
+					fprintf(stderr, "invalid value of \"ikey\"\n");
+					exit(-1);
+				}
+				p->i_key = htonl(uval);
+			}
+		} else if (strcmp(*argv, "okey") == 0) {
+			unsigned uval;
+			NEXT_ARG();
+			p->o_flags |= GRE_KEY;
+			if (strchr(*argv, '.'))
+				p->o_key = get_addr32(*argv);
+			else {
+				if (get_unsigned(&uval, *argv, 0)<0) {
+					fprintf(stderr, "invalid value of \"okey\"\n");
+					exit(-1);
+				}
+				p->o_key = htonl(uval);
+			}
+		} else if (strcmp(*argv, "seq") == 0) {
+			p->i_flags |= GRE_SEQ;
+			p->o_flags |= GRE_SEQ;
+		} else if (strcmp(*argv, "iseq") == 0) {
+			p->i_flags |= GRE_SEQ;
+		} else if (strcmp(*argv, "oseq") == 0) {
+			p->o_flags |= GRE_SEQ;
+		} else if (strcmp(*argv, "csum") == 0) {
+			p->i_flags |= GRE_CSUM;
+			p->o_flags |= GRE_CSUM;
+		} else if (strcmp(*argv, "icsum") == 0) {
+			p->i_flags |= GRE_CSUM;
+		} else if (strcmp(*argv, "ocsum") == 0) {
+			p->o_flags |= GRE_CSUM;
 		} else {
 			if (strcmp(*argv, "name") == 0) {
 				NEXT_ARG();
@@ -212,7 +291,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm *p)
 				duparg2("name", *argv);
 			strncpy(p->name, *argv, IFNAMSIZ - 1);
 			if (cmd == SIOCCHGTUNNEL && count == 0) {
-				struct ip6_tnl_parm old_p;
+				struct ip6_tnl_parm2 old_p;
 				memset(&old_p, 0, sizeof(old_p));
 				if (tnl_get_ioctl(*argv, &old_p))
 					return -1;
@@ -230,7 +309,7 @@ static int parse_args(int argc, char **argv, int cmd, struct ip6_tnl_parm *p)
 	return 0;
 }
 
-static void ip6_tnl_parm_init(struct ip6_tnl_parm *p, int apply_default)
+static void ip6_tnl_parm_init(struct ip6_tnl_parm2 *p, int apply_default)
 {
 	memset(p, 0, sizeof(*p));
 	p->proto = IPPROTO_IPV6;
@@ -244,8 +323,8 @@ static void ip6_tnl_parm_init(struct ip6_tnl_parm *p, int apply_default)
  * @p1: user specified parameter
  * @p2: database entry
  */
-static int ip6_tnl_parm_match(const struct ip6_tnl_parm *p1,
-			      const struct ip6_tnl_parm *p2)
+static int ip6_tnl_parm_match(const struct ip6_tnl_parm2 *p1,
+			      const struct ip6_tnl_parm2 *p2)
 {
 	return ((!p1->link || p1->link == p2->link) &&
 		(!p1->name[0] || strcmp(p1->name, p2->name) == 0) &&
@@ -263,7 +342,7 @@ static int ip6_tnl_parm_match(const struct ip6_tnl_parm *p1,
 		(!p1->flags || (p1->flags & p2->flags)));
 }
 
-static int do_tunnels_list(struct ip6_tnl_parm *p)
+static int do_tunnels_list(struct ip6_tnl_parm2 *p)
 {
 	char buf[512];
 	int err = -1;
@@ -287,7 +366,7 @@ static int do_tunnels_list(struct ip6_tnl_parm *p)
 			rx_fifo, rx_frame,
 			tx_bytes, tx_packets, tx_errs, tx_drops,
 			tx_fifo, tx_colls, tx_carrier, rx_multi;
-		struct ip6_tnl_parm p1;
+		struct ip6_tnl_parm2 p1;
 		char *ptr;
 
 		buf[sizeof(buf) - 1] = '\0';
@@ -312,10 +391,12 @@ static int do_tunnels_list(struct ip6_tnl_parm *p)
 			fprintf(stderr, "Failed to get type of \"%s\"\n", name);
 			continue;
 		}
-		if (type != ARPHRD_TUNNEL6)
+		if (type != ARPHRD_TUNNEL6 && type != ARPHRD_IP6GRE)
 			continue;
 		memset(&p1, 0, sizeof(p1));
 		ip6_tnl_parm_init(&p1, 0);
+		if (type == ARPHRD_IP6GRE)
+			p1.proto = IPPROTO_GRE;
 		strcpy(p1.name, name);
 		p1.link = ll_name_to_index(p1.name);
 		if (p1.link == 0)
@@ -346,7 +427,7 @@ static int do_tunnels_list(struct ip6_tnl_parm *p)
 
 static int do_show(int argc, char **argv)
 {
-        struct ip6_tnl_parm p;
+        struct ip6_tnl_parm2 p;
 
 	ll_init_map(&rth);
 	ip6_tnl_parm_init(&p, 0);
@@ -369,28 +450,44 @@ static int do_show(int argc, char **argv)
 
 static int do_add(int cmd, int argc, char **argv)
 {
-	struct ip6_tnl_parm p;
+	struct ip6_tnl_parm2 p;
 
 	ip6_tnl_parm_init(&p, 1);
 
 	if (parse_args(argc, argv, cmd, &p) < 0)
 		return -1;
 
-	return tnl_add_ioctl(cmd,
-			     cmd == SIOCCHGTUNNEL && p.name[0] ?
-			     p.name : "ip6tnl0", p.name, &p);
+	switch (p.proto) {
+	case IPPROTO_IPIP:
+	case IPPROTO_IPV6:
+		return tnl_add_ioctl(cmd, "ip6tnl0", p.name, &p);
+	case IPPROTO_GRE:
+		return tnl_add_ioctl(cmd, "ip6gre0", p.name, &p);
+	default:
+		fprintf(stderr, "cannot determine tunnel mode (ip6ip6, ipip6 or gre)\n");
+	}
+	return -1;
 }
 
 static int do_del(int argc, char **argv)
 {
-	struct ip6_tnl_parm p;
+	struct ip6_tnl_parm2 p;
 
 	ip6_tnl_parm_init(&p, 1);
 
 	if (parse_args(argc, argv, SIOCDELTUNNEL, &p) < 0)
 		return -1;
 
-	return tnl_del_ioctl(p.name[0] ? p.name : "ip6tnl0", p.name, &p);
+	switch (p.proto) {
+	case IPPROTO_IPIP:
+	case IPPROTO_IPV6:
+		return tnl_del_ioctl("ip6tnl0", p.name, &p);
+	case IPPROTO_GRE:
+		return tnl_del_ioctl("ip6gre0", p.name, &p);
+	default:
+		return tnl_del_ioctl(p.name, p.name, &p);
+	}
+	return -1;
 }
 
 int do_ip6tunnel(int argc, char **argv)
diff --git a/ip/iplink.c b/ip/iplink.c
index ada9d42..16cb6fe 100644
--- a/ip/iplink.c
+++ b/ip/iplink.c
@@ -85,7 +85,8 @@ void iplink_usage(void)
 	if (iplink_have_newlink()) {
 		fprintf(stderr, "\n");
 		fprintf(stderr, "TYPE := { vlan | veth | vcan | dummy | ifb | macvlan | can |\n");
-		fprintf(stderr, "          bridge | ipoib | ip6tnl | ipip | sit | vxlan }\n");
+		fprintf(stderr, "          bridge | ipoib | ip6tnl | ipip | sit | vxlan |\n");
+		fprintf(stderr, "          gre | gretap | ip6gre | ip6gretap }\n");
 	}
 	exit(-1);
 }
@@ -243,7 +244,7 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp,
 			}
 			ivt.vf = vf;
 			addattr_l(&req->n, sizeof(*req), IFLA_VF_TX_RATE, &ivt, sizeof(ivt));
-		
+
 		} else if (matches(*argv, "spoofchk") == 0) {
 			struct ifla_vf_spoofchk ivs;
 			NEXT_ARG();
@@ -286,7 +287,6 @@ static int iplink_parse_vf(int vf, int *argcp, char ***argvp,
 	return 0;
 }
 
-
 int iplink_parse(int argc, char **argv, struct iplink_req *req,
 		char **name, char **type, char **link, char **dev, int *group)
 {
@@ -811,7 +811,6 @@ static int set_address(struct ifreq *ifr, int brd)
 	return 0;
 }
 
-
 static int do_set(int argc, char **argv)
 {
 	char *dev = NULL;
diff --git a/ip/link_gre6.c b/ip/link_gre6.c
new file mode 100644
index 0000000..4c9c536
--- /dev/null
+++ b/ip/link_gre6.c
@@ -0,0 +1,398 @@
+/*
+ * link_gre6.c	gre driver module
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Dmitry Kozlov <xeb@mail.ru>
+ *
+ */
+
+#include <string.h>
+#include <net/if.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+
+#include <linux/ip.h>
+#include <linux/if_tunnel.h>
+#include <linux/ip6_tunnel.h>
+
+#include "rt_names.h"
+#include "utils.h"
+#include "ip_common.h"
+#include "tunnel.h"
+
+#define IP6_FLOWINFO_TCLASS	htonl(0x0FF00000)
+#define IP6_FLOWINFO_FLOWLABEL	htonl(0x000FFFFF)
+
+#define DEFAULT_TNL_HOP_LIMIT	(64)
+
+static void usage(void) __attribute__((noreturn));
+static void usage(void)
+{
+	fprintf(stderr, "Usage: ip link { add | set | change | replace | del } NAME\n");
+	fprintf(stderr, "          type { ip6gre | ip6gretap } [ remote ADDR ] [ local ADDR ]\n");
+	fprintf(stderr, "          [ [i|o]seq ] [ [i|o]key KEY ] [ [i|o]csum ]\n");
+	fprintf(stderr, "          [ hoplimit TTL ] [ encaplimit ELIM ]\n");
+	fprintf(stderr, "          [ tclass TCLASS ] [ flowlabel FLOWLABEL ]\n");
+	fprintf(stderr, "          [ dscp inherit ] [ dev PHYS_DEV ]\n");
+	fprintf(stderr, "\n");
+	fprintf(stderr, "Where: NAME      := STRING\n");
+	fprintf(stderr, "       ADDR      := IPV6_ADDRESS\n");
+	fprintf(stderr, "       TTL       := { 0..255 } (default=%d)\n",
+		DEFAULT_TNL_HOP_LIMIT);
+	fprintf(stderr, "       KEY       := { DOTTED_QUAD | NUMBER }\n");
+	fprintf(stderr, "       ELIM      := { none | 0..255 }(default=%d)\n",
+		IPV6_DEFAULT_TNL_ENCAP_LIMIT);
+	fprintf(stderr, "       TCLASS    := { 0x0..0xff | inherit }\n");
+	fprintf(stderr, "       FLOWLABEL := { 0x0..0xfffff | inherit }\n");
+	exit(-1);
+}
+
+static int gre_parse_opt(struct link_util *lu, int argc, char **argv,
+			 struct nlmsghdr *n)
+{
+	struct {
+		struct nlmsghdr n;
+		struct ifinfomsg i;
+		char buf[1024];
+	} req;
+	struct ifinfomsg *ifi = (struct ifinfomsg *)(n + 1);
+	struct rtattr *tb[IFLA_MAX + 1];
+	struct rtattr *linkinfo[IFLA_INFO_MAX+1];
+	struct rtattr *greinfo[IFLA_GRE_MAX + 1];
+	__u16 iflags = 0;
+	__u16 oflags = 0;
+	unsigned ikey = 0;
+	unsigned okey = 0;
+	struct in6_addr raddr = IN6ADDR_ANY_INIT;
+	struct in6_addr laddr = IN6ADDR_ANY_INIT;
+	unsigned link = 0;
+	unsigned flowinfo = 0;
+	unsigned flags = 0;
+	__u8 hop_limit = DEFAULT_TNL_HOP_LIMIT;
+	__u8 encap_limit = IPV6_DEFAULT_TNL_ENCAP_LIMIT;
+	int len;
+
+	if (!(n->nlmsg_flags & NLM_F_CREATE)) {
+		memset(&req, 0, sizeof(req));
+
+		req.n.nlmsg_len = NLMSG_LENGTH(sizeof(*ifi));
+		req.n.nlmsg_flags = NLM_F_REQUEST;
+		req.n.nlmsg_type = RTM_GETLINK;
+		req.i.ifi_family = preferred_family;
+		req.i.ifi_index = ifi->ifi_index;
+
+		if (rtnl_talk(&rth, &req.n, 0, 0, &req.n) < 0) {
+get_failed:
+			fprintf(stderr,
+				"Failed to get existing tunnel info.\n");
+			return -1;
+		}
+
+		len = req.n.nlmsg_len;
+		len -= NLMSG_LENGTH(sizeof(*ifi));
+		if (len < 0)
+			goto get_failed;
+
+		parse_rtattr(tb, IFLA_MAX, IFLA_RTA(&req.i), len);
+
+		if (!tb[IFLA_LINKINFO])
+			goto get_failed;
+
+		parse_rtattr_nested(linkinfo, IFLA_INFO_MAX, tb[IFLA_LINKINFO]);
+
+		if (!linkinfo[IFLA_INFO_DATA])
+			goto get_failed;
+
+		parse_rtattr_nested(greinfo, IFLA_GRE_MAX,
+				    linkinfo[IFLA_INFO_DATA]);
+
+		if (greinfo[IFLA_GRE_IKEY])
+			ikey = rta_getattr_u32(greinfo[IFLA_GRE_IKEY]);
+
+		if (greinfo[IFLA_GRE_OKEY])
+			okey = rta_getattr_u32(greinfo[IFLA_GRE_OKEY]);
+
+		if (greinfo[IFLA_GRE_IFLAGS])
+			iflags = rta_getattr_u16(greinfo[IFLA_GRE_IFLAGS]);
+
+		if (greinfo[IFLA_GRE_OFLAGS])
+			oflags = rta_getattr_u16(greinfo[IFLA_GRE_OFLAGS]);
+
+		if (greinfo[IFLA_GRE_LOCAL])
+			memcpy(&laddr, RTA_DATA(greinfo[IFLA_GRE_LOCAL]), sizeof(laddr));
+
+		if (greinfo[IFLA_GRE_REMOTE])
+			memcpy(&raddr, RTA_DATA(greinfo[IFLA_GRE_REMOTE]), sizeof(raddr));
+
+		if (greinfo[IFLA_GRE_TTL])
+			hop_limit = rta_getattr_u8(greinfo[IFLA_GRE_TTL]);
+
+		if (greinfo[IFLA_GRE_LINK])
+			link = rta_getattr_u32(greinfo[IFLA_GRE_LINK]);
+
+		if (greinfo[IFLA_GRE_ENCAP_LIMIT])
+			encap_limit = rta_getattr_u8(greinfo[IFLA_GRE_ENCAP_LIMIT]);
+
+		if (greinfo[IFLA_GRE_FLOWINFO])
+			flowinfo = rta_getattr_u32(greinfo[IFLA_GRE_FLOWINFO]);
+
+		if (greinfo[IFLA_GRE_FLAGS])
+			flags = rta_getattr_u32(greinfo[IFLA_GRE_FLAGS]);
+	}
+
+	while (argc > 0) {
+		if (!matches(*argv, "key")) {
+			unsigned uval;
+
+			NEXT_ARG();
+			iflags |= GRE_KEY;
+			oflags |= GRE_KEY;
+			if (strchr(*argv, '.'))
+				uval = get_addr32(*argv);
+			else {
+				if (get_unsigned(&uval, *argv, 0) < 0) {
+					fprintf(stderr,
+						"Invalid value for \"key\"\n");
+					exit(-1);
+				}
+				uval = htonl(uval);
+			}
+
+			ikey = okey = uval;
+		} else if (!matches(*argv, "ikey")) {
+			unsigned uval;
+
+			NEXT_ARG();
+			iflags |= GRE_KEY;
+			if (strchr(*argv, '.'))
+				uval = get_addr32(*argv);
+			else {
+				if (get_unsigned(&uval, *argv, 0)<0) {
+					fprintf(stderr, "invalid value of \"ikey\"\n");
+					exit(-1);
+				}
+				uval = htonl(uval);
+			}
+			ikey = uval;
+		} else if (!matches(*argv, "okey")) {
+			unsigned uval;
+
+			NEXT_ARG();
+			oflags |= GRE_KEY;
+			if (strchr(*argv, '.'))
+				uval = get_addr32(*argv);
+			else {
+				if (get_unsigned(&uval, *argv, 0)<0) {
+					fprintf(stderr, "invalid value of \"okey\"\n");
+					exit(-1);
+				}
+				uval = htonl(uval);
+			}
+			okey = uval;
+		} else if (!matches(*argv, "seq")) {
+			iflags |= GRE_SEQ;
+			oflags |= GRE_SEQ;
+		} else if (!matches(*argv, "iseq")) {
+			iflags |= GRE_SEQ;
+		} else if (!matches(*argv, "oseq")) {
+			oflags |= GRE_SEQ;
+		} else if (!matches(*argv, "csum")) {
+			iflags |= GRE_CSUM;
+			oflags |= GRE_CSUM;
+		} else if (!matches(*argv, "icsum")) {
+			iflags |= GRE_CSUM;
+		} else if (!matches(*argv, "ocsum")) {
+			oflags |= GRE_CSUM;
+		} else if (!matches(*argv, "remote")) {
+			inet_prefix addr;
+			NEXT_ARG();
+			get_prefix(&addr, *argv, preferred_family);
+			if (addr.family == AF_UNSPEC)
+				invarg("\"remote\" address family is AF_UNSPEC", *argv);
+			memcpy(&raddr, &addr.data, sizeof(raddr));
+		} else if (!matches(*argv, "local")) {
+			inet_prefix addr;
+			NEXT_ARG();
+			get_prefix(&addr, *argv, preferred_family);
+			if (addr.family == AF_UNSPEC)
+				invarg("\"local\" address family is AF_UNSPEC", *argv);
+			memcpy(&laddr, &addr.data, sizeof(laddr));
+		} else if (!matches(*argv, "dev")) {
+			NEXT_ARG();
+			link = if_nametoindex(*argv);
+			if (link == 0)
+				exit(-1);
+		} else if (!matches(*argv, "ttl") ||
+			   !matches(*argv, "hoplimit")) {
+			__u8 uval;
+			NEXT_ARG();
+			if (get_u8(&uval, *argv, 0))
+				invarg("invalid TTL", *argv);
+			hop_limit = uval;
+		} else if (!matches(*argv, "tos") ||
+			   !matches(*argv, "tclass") ||
+			   !matches(*argv, "dsfield")) {
+			__u8 uval;
+			NEXT_ARG();
+			if (strcmp(*argv, "inherit") == 0)
+				flags |= IP6_TNL_F_USE_ORIG_TCLASS;
+			else {
+				if (get_u8(&uval, *argv, 16))
+					invarg("invalid TClass", *argv);
+				flowinfo |= htonl((__u32)uval << 20) & IP6_FLOWINFO_TCLASS;
+				flags &= ~IP6_TNL_F_USE_ORIG_TCLASS;
+			}
+		} else if (strcmp(*argv, "flowlabel") == 0 ||
+			   strcmp(*argv, "fl") == 0) {
+			__u32 uval;
+			NEXT_ARG();
+			if (strcmp(*argv, "inherit") == 0)
+				flags |= IP6_TNL_F_USE_ORIG_FLOWLABEL;
+			else {
+				if (get_u32(&uval, *argv, 16))
+					invarg("invalid Flowlabel", *argv);
+				if (uval > 0xFFFFF)
+					invarg("invalid Flowlabel", *argv);
+				flowinfo |= htonl(uval) & IP6_FLOWINFO_FLOWLABEL;
+				flags &= ~IP6_TNL_F_USE_ORIG_FLOWLABEL;
+			}
+		} else if (strcmp(*argv, "dscp") == 0) {
+			NEXT_ARG();
+			if (strcmp(*argv, "inherit") != 0)
+				invarg("not inherit", *argv);
+			flags |= IP6_TNL_F_RCV_DSCP_COPY;
+		} else
+			usage();
+		argc--; argv++;
+	}
+
+	addattr32(n, 1024, IFLA_GRE_IKEY, ikey);
+	addattr32(n, 1024, IFLA_GRE_OKEY, okey);
+	addattr_l(n, 1024, IFLA_GRE_IFLAGS, &iflags, 2);
+	addattr_l(n, 1024, IFLA_GRE_OFLAGS, &oflags, 2);
+	addattr_l(n, 1024, IFLA_GRE_LOCAL, &laddr, sizeof(laddr));
+	addattr_l(n, 1024, IFLA_GRE_REMOTE, &raddr, sizeof(raddr));
+	if (link)
+		addattr32(n, 1024, IFLA_GRE_LINK, link);
+	addattr_l(n, 1024, IFLA_GRE_TTL, &hop_limit, 1);
+	addattr_l(n, 1024, IFLA_GRE_ENCAP_LIMIT, &encap_limit, 1);
+	addattr_l(n, 1024, IFLA_GRE_FLOWINFO, &flowinfo, 4);
+	addattr_l(n, 1024, IFLA_GRE_FLAGS, &flowinfo, 4);
+
+	return 0;
+}
+
+static void gre_print_opt(struct link_util *lu, FILE *f, struct rtattr *tb[])
+{
+	char s1[1024];
+	char s2[64];
+	const char *local = "any";
+	const char *remote = "any";
+	unsigned iflags = 0;
+	unsigned oflags = 0;
+	unsigned flags = 0;
+	unsigned flowinfo = 0;
+	struct in6_addr in6_addr_any = IN6ADDR_ANY_INIT;
+
+	if (!tb)
+		return;
+
+	if (tb[IFLA_GRE_FLAGS])
+		flags = rta_getattr_u32(tb[IFLA_GRE_FLAGS]);
+
+	if (tb[IFLA_GRE_FLOWINFO])
+		flags = rta_getattr_u32(tb[IFLA_GRE_FLOWINFO]);
+
+	if (tb[IFLA_GRE_REMOTE]) {
+		struct in6_addr addr;
+		memcpy(&addr, RTA_DATA(tb[IFLA_GRE_REMOTE]), sizeof(addr));
+
+		if (memcmp(&addr, &in6_addr_any, sizeof(addr)))
+			remote = format_host(AF_INET6, sizeof(addr), &addr, s1, sizeof(s1));
+	}
+
+	fprintf(f, "remote %s ", remote);
+
+	if (tb[IFLA_GRE_LOCAL]) {
+		struct in6_addr addr;
+		memcpy(&addr, RTA_DATA(tb[IFLA_GRE_LOCAL]), sizeof(addr));
+
+		if (memcmp(&addr, &in6_addr_any, sizeof(addr)))
+			local = format_host(AF_INET6, sizeof(addr), &addr, s1, sizeof(s1));
+	}
+
+	fprintf(f, "local %s ", local);
+
+	if (tb[IFLA_GRE_LINK] && rta_getattr_u32(tb[IFLA_GRE_LINK])) {
+		unsigned link = rta_getattr_u32(tb[IFLA_GRE_LINK]);
+		const char *n = if_indextoname(link, s2);
+
+		if (n)
+			fprintf(f, "dev %s ", n);
+		else
+			fprintf(f, "dev %u ", link);
+	}
+
+	if (tb[IFLA_GRE_TTL] && rta_getattr_u8(tb[IFLA_GRE_TTL]))
+		fprintf(f, "hoplimit %d ", rta_getattr_u8(tb[IFLA_GRE_TTL]));
+
+	if (flags & IP6_TNL_F_IGN_ENCAP_LIMIT)
+		fprintf(f, "encaplimit none ");
+	else if (tb[IFLA_GRE_ENCAP_LIMIT]) {
+		int encap_limit = rta_getattr_u8(tb[IFLA_GRE_ENCAP_LIMIT]);
+
+		fprintf(f, "encaplimit %d ", encap_limit);
+	}
+
+	if (flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
+		fprintf(f, "flowlabel inherit ");
+	else
+		fprintf(f, "flowlabel 0x%05x ", ntohl(flowinfo & IP6_FLOWINFO_FLOWLABEL));
+
+	if (flags & IP6_TNL_F_RCV_DSCP_COPY)
+		fprintf(f, "dscp inherit ");
+
+	if (tb[IFLA_GRE_IFLAGS])
+		iflags = rta_getattr_u16(tb[IFLA_GRE_IFLAGS]);
+
+	if (tb[IFLA_GRE_OFLAGS])
+		oflags = rta_getattr_u16(tb[IFLA_GRE_OFLAGS]);
+
+	if ((iflags & GRE_KEY) && tb[IFLA_GRE_IKEY]) {
+		inet_ntop(AF_INET, RTA_DATA(tb[IFLA_GRE_IKEY]), s2, sizeof(s2));
+		fprintf(f, "ikey %s ", s2);
+	}
+
+	if ((oflags & GRE_KEY) && tb[IFLA_GRE_OKEY]) {
+		inet_ntop(AF_INET, RTA_DATA(tb[IFLA_GRE_OKEY]), s2, sizeof(s2));
+		fprintf(f, "okey %s ", s2);
+	}
+
+	if (iflags & GRE_SEQ)
+		fputs("iseq ", f);
+	if (oflags & GRE_SEQ)
+		fputs("oseq ", f);
+	if (iflags & GRE_CSUM)
+		fputs("icsum ", f);
+	if (oflags & GRE_CSUM)
+		fputs("ocsum ", f);
+}
+
+struct link_util ip6gre_link_util = {
+	.id = "ip6gre",
+	.maxattr = IFLA_GRE_MAX,
+	.parse_opt = gre_parse_opt,
+	.print_opt = gre_print_opt,
+};
+
+struct link_util ip6gretap_link_util = {
+	.id = "ip6gretap",
+	.maxattr = IFLA_GRE_MAX,
+	.parse_opt = gre_parse_opt,
+	.print_opt = gre_print_opt,
+};
-- 
1.8.3.2

^ permalink raw reply related

* [PATCH v2 1/2] powerpc: net: filter: fix DIVWU instruction opcode
From: Vladimir Murzin @ 2013-09-28  8:22 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: netdev, davem, benh, paulus, matt, edumazet, dborkman,
	Vladimir Murzin

Currently DIVWU stands for *signed* divw opcode:

7d 2a 4b 96 	divwu   r9,r10,r9
7d 2a 4b d6 	divw    r9,r10,r9

Use the *unsigned* divw opcode for DIVWU.

Suggested-by: Vassili Karpov <av1474@comtv.ru>
Reviewed-by: Vassili Karpov <av1474@comtv.ru>
Signed-off-by: Vladimir Murzin <murzin.v@gmail.com>
Acked-by: Matt Evans <matt@ozlabs.org>
---
Changelog

v1->v2

Added credit to Vassili Karpov (malc) who kindly reviewed generated assembly
[1] and highlighted usage of signed division.
Note: temporary, for technical reason, he's not able to receive email.

[1]http://www.mail-archive.com/linuxppc-dev@lists.ozlabs.org/msg71635.html

 arch/powerpc/include/asm/ppc-opcode.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index eccfc16..0a4a683 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -171,7 +171,7 @@
 #define PPC_INST_MULLW			0x7c0001d6
 #define PPC_INST_MULHWU			0x7c000016
 #define PPC_INST_MULLI			0x1c000000
-#define PPC_INST_DIVWU			0x7c0003d6
+#define PPC_INST_DIVWU			0x7c000396
 #define PPC_INST_RLWINM			0x54000000
 #define PPC_INST_RLDICR			0x78000004
 #define PPC_INST_SLW			0x7c000030
-- 
1.8.1.5

^ permalink raw reply related

* [PATCH v2 2/2] ppc: bpf_jit: support MOD operation
From: Vladimir Murzin @ 2013-09-28  8:22 UTC (permalink / raw)
  To: linuxppc-dev
  Cc: netdev, davem, benh, paulus, matt, edumazet, dborkman,
	Vladimir Murzin
In-Reply-To: <1380356521-3432-1-git-send-email-murzin.v@gmail.com>

commit b6069a9570 (filter: add MOD operation) added generic
support for modulus operation in BPF.

This patch brings JIT support for PPC64

Signed-off-by: Vladimir Murzin <murzin.v@gmail.com>
Acked-by: Matt Evans <matt@ozlabs.org>
---
Changelog

v1->v2

Definition for r_scratch2 was moved to header file.

 arch/powerpc/net/bpf_jit.h      |  1 +
 arch/powerpc/net/bpf_jit_comp.c | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 8a5dfaf..42a115a 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -39,6 +39,7 @@
 #define r_X		5
 #define r_addr		6
 #define r_scratch1	7
+#define r_scratch2	8
 #define r_D		14
 #define r_HL		15
 #define r_M		16
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index bf56e33..cbb2702 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -193,6 +193,26 @@ static int bpf_jit_build_body(struct sk_filter *fp, u32 *image,
 				PPC_MUL(r_A, r_A, r_scratch1);
 			}
 			break;
+		case BPF_S_ALU_MOD_X: /* A %= X; */
+			ctx->seen |= SEEN_XREG;
+			PPC_CMPWI(r_X, 0);
+			if (ctx->pc_ret0 != -1) {
+				PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
+			} else {
+				PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12);
+				PPC_LI(r_ret, 0);
+				PPC_JMP(exit_addr);
+			}
+			PPC_DIVWU(r_scratch1, r_A, r_X);
+			PPC_MUL(r_scratch1, r_X, r_scratch1);
+			PPC_SUB(r_A, r_A, r_scratch1);
+			break;
+		case BPF_S_ALU_MOD_K: /* A %= K; */
+			PPC_LI32(r_scratch2, K);
+			PPC_DIVWU(r_scratch1, r_A, r_scratch2);
+			PPC_MUL(r_scratch1, r_scratch2, r_scratch1);
+			PPC_SUB(r_A, r_A, r_scratch1);
+			break;
 		case BPF_S_ALU_DIV_X: /* A /= X; */
 			ctx->seen |= SEEN_XREG;
 			PPC_CMPWI(r_X, 0);
-- 
1.8.1.5

^ permalink raw reply related

* Exclusive offer, feel it for real
From: BarrettDavis @ 2013-09-28 11:08 UTC (permalink / raw)


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=us-ascii, Size: 300 bytes --]


		Dear Customer 

Make your first deposit and we will triple your bonus money up to £600.

http://translate.googleusercontent.com/translate_c?depth=1&hl=auto&sl=fr&url=www.google.com.bd&u=http://onlinecasino27.yolasite.com/casino1&usg=ALkJrhjsEp2wEy6D4p3zB8y4lFpDcS7xmg

Best Regards, 
		

^ permalink raw reply

* Re: [PATCH] iproute2: xfrm state add abort issue
From: Sohny Thomas @ 2013-09-28 13:24 UTC (permalink / raw)
  To: David Laight; +Cc: stephen, netdev
In-Reply-To: <AE90C24D6B3A694183C094C60CF0A2F6026B7361@saturn3.aculab.com>

On Friday 27 September 2013 01:56 PM, David Laight wrote:
>> ip xfrm state add causes a SIGABRT due to a strncpy_chk .
>> This happens since strncpy doesn't account for the '\0' .
>> I have fixed this using sizeof  instead of strlen .
>>
>> There is a redhat bug which documents this issue
>>
>> https://bugzilla.redhat.com/show_bug.cgi?id=982761
>>
>> Signed-off-by: Sohny Thomas <sohthoma@in.ibm.com>
>>
>> --------------
>>
>> diff --git a/ip/xfrm_state.c b/ip/xfrm_state.c
>> index 389942c..7dd8799 100644
>> --- a/ip/xfrm_state.c
>> +++ b/ip/xfrm_state.c
>> @@ -117,7 +117,7 @@ static int xfrm_algo_parse(struct xfrm_algo *alg,
>> enum xfrm_attr_type_t type,
>>                               char *name, char *key, char *buf, int max)
>>     {
>>            int len;
>> -       int slen = strlen(key);
>> +       int slen = sizeof(key);
>
> you definitely don't want sizeof(key) - that is either 4 or 8.
oh damn my bad.
I think i will go with strlen(key) + 1.

or i will pass slen+1 to strncpy .

Regards,
Sohny
>
> 	David
>
>
>
>

^ permalink raw reply

* Re: [PATCH v1 net-next] net: pkt_sched: PIE AQM scheme
From: Eric Dumazet @ 2013-09-28 17:03 UTC (permalink / raw)
  To: Vijay Subramanian; +Cc: netdev, davem, shemminger, Mythili Prabhu, Dave Taht
In-Reply-To: <1380333383-9507-1-git-send-email-subramanian.vijay@gmail.com>

On Fri, 2013-09-27 at 18:56 -0700, Vijay Subramanian wrote:

> +static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
> +{
> +	struct pie_sched_data *q = qdisc_priv(sch);
> +
> +	if (unlikely(qdisc_qlen(sch) >= sch->limit))
> +		goto out;
> +

...

> +out:
> +	q->stats.overlimit++;
> +	qdisc_drop(skb, sch);
> +	return NET_XMIT_CN;  /*indicate congestion*/
> +}


If a Qdisc drops a packet because sch->limit is hit, you must :

return qdisc_drop(skb, sch);

So that NET_XMIT_DROP is returned, not NET_XMIT_CN.

This packet was dropped for sure.

vi +96 net/sched/sch_api.c

   ---enqueue

   enqueue returns 0, if packet was enqueued successfully.
   If packet (this one or another one) was dropped, it returns
   not zero error code.
   NET_XMIT_DROP        - this packet dropped
     Expected action: do not backoff, but wait until queue will clear.
   NET_XMIT_CN          - probably this packet enqueued, but another one dropped.
     Expected action: backoff or ignore
   NET_XMIT_POLICED     - dropped by police.
     Expected action: backoff or error to real-time apps.

^ permalink raw reply

* Re: [PATCH v1 net-next] net: pkt_sched: PIE AQM scheme
From: Stephen Hemminger @ 2013-09-28 17:06 UTC (permalink / raw)
  To: Vijay Subramanian; +Cc: netdev
In-Reply-To: <1902752B0C92F943AB7EA9EE13E2DEEC1272C74DD1@HQ1-EXCH02.corp.brocade.com>

Thanks for submitting this, it is in ok shape, but I still
see several issues.

> diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
> index f2624b5..2fb6e6d 100644
> --- a/include/uapi/linux/pkt_sched.h
> +++ b/include/uapi/linux/pkt_sched.h
> @@ -787,4 +787,30 @@ struct tc_fq_qd_stats {
>         __u32   throttled_flows;
>         __u32   pad;
>  };
> +
> +/*PIE*/
> +enum {
> +       TCA_PIE_UNSPEC,
> +       TCA_PIE_TARGET,
> +       TCA_PIE_LIMIT,
> +       TCA_PIE_TUPDATE,
> +       TCA_PIE_ALPHA,
> +       TCA_PIE_BETA,
> +       TCA_PIE_ECN,
> +       TCA_PIE_BYTEMODE,
> +       __TCA_PIE_MAX
> +};
> +#define TCA_PIE_MAX   (__TCA_PIE_MAX - 1)
> +
> +struct tc_pie_xstats {
> +       __u32 prob;             /* current probability */
> +       __u32 delay;            /* current delay in ms */
> +       __u32 avg_dq_rate;      /* current average dq_rate in bits/pie_time */
> +       __u32 packets_in;       /* total number of packets enqueued */
> +       __u32 dropped;          /* packets dropped due to pie_action */
> +       __u32 overlimit;        /* dropped due to lack of space in queue */
> +       __u32 maxq;             /* maximum queue size */
> +       __u32 ecn_mark;         /* packets marked with ecn*/
> +};
> +
>  #endif
> diff --git a/net/sched/Kconfig b/net/sched/Kconfig
> index c03a32a..7b32e58 100644
> --- a/net/sched/Kconfig
> +++ b/net/sched/Kconfig
> @@ -286,6 +286,17 @@ config NET_SCH_FQ
> 
>           If unsure, say N.
> 
> +config NET_SCH_PIE
> +        tristate "Proportianal Enhanced Controller AQM (PIE)"

Spelling should be: Proportional 


> diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
> new file mode 100644
> index 0000000..cfcfde9
> --- /dev/null
> +++ b/net/sched/sch_pie.c
> @@ -0,0 +1,623 @@
> +/* Copyright (C) 2013 Cisco Systems, Inc, 2013.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version 2
> + * of the License.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
> + * USA.
> + *
> + * Author: Vijay Subramanian <vijaynsu@cisco.com>
> + * Author: Mythili Prabhu <mysuryan@cisco.com>
> + *
> + * ECN support is added by Naeem Khademi <naeemk@ifi.uio.no>
> + * University of Oslo, Norway.
> + */
> +
> +#include <linux/module.h>
> +#include <linux/slab.h>
> +#include <linux/types.h>
> +#include <linux/kernel.h>
> +#include <linux/errno.h>
> +#include <linux/skbuff.h>
> +#include <net/pkt_sched.h>
> +#include <net/inet_ecn.h>
> +
> +#define PIE_DEFAULT_QUEUE_LIMIT 200    /* in packets */
> +#define QUEUE_THRESHOLD (5000)
Useless paren's here

> +#define DQCOUNT_INVALID -1
> +#define THRESHOLD_PKT_SIZE     1500
> +#define MAX_INT_VALUE  0xffffffff
> +#define MAX_INT_VALUE_CAP  (0xffffffff >> 8)
> +
> +typedef u32 pie_time_t;
> +typedef s32 pie_tdiff_t;
> +#define PIE_SHIFT 10
> +#define MS2PIETIME(a) ((a * NSEC_PER_MSEC) >> PIE_SHIFT)
> +#define PIE_TIME_PER_SEC  ((NSEC_PER_SEC >> PIE_SHIFT))
> +

I would prefer that all packet schedulers use the same set of clock
routines (psched), rather than inventing own wrapper for high resolution
clock.

> +static inline pie_time_t pie_get_time(void)
> +{
> +       u64 ns = ktime_to_ns(ktime_get());
> +       return ns >> PIE_SHIFT;
> +}
> +
> +static inline u32 pie_time_to_ms(pie_time_t val)
> +{
> +       u64 valms = ((u64) val << PIE_SHIFT);
> +
> +       do_div(valms, NSEC_PER_MSEC);
> +       return (u32) valms;
> +}

Psched has all this.

> +/* parameters used*/
> +struct pie_params {
> +       pie_time_t target;      /* user specified target delay in pietime*/
> +       pie_time_t tupdate;     /* frequency with which the timer fires*/
                                                                         ^ space before end of comment
> +       u32 limit;              /* number of packets that can be enqueued */
> +       u32 alpha;              /* alpha and beta are between -4 and 4 */
> +       u32 beta;               /* and are used for shift relative to 1 */
> +       bool ecn;               /* true if ecn is enabled */
> +       bool bytemode;          /* to scale drop early prob based on pkt size */
> +};
> +
> +/* variables used*/
> +struct pie_vars {
> +       u32 prob;               /* probability but scaled by u32 limit. */
> +       pie_time_t burst_time;
> +       pie_time_t qdelay;
> +       pie_time_t qdelay_old;
> +       u32 dq_count;           /* measured in bytes */
> +       pie_time_t dq_tstamp;   /* drain rate */
> +       u32 avg_dq_rate;        /* bytes per pietime tick, scaled by 8 */
> +       u32 qlen_old;           /* in bytes */
> +};
> +
> +struct pie_stats {
> +       u32 packets_in;         /* total number of packets enqueued */
> +       u32 dropped;            /* packets dropped due to pie_action */
> +       u32 overlimit;          /* dropped due to lack of space in queue */
> +       u32 maxq;               /* maximum queue size */
> +       u32 ecn_mark;           /* packets marked with ECN */
> +};
> +
> +static void pie_params_init(struct pie_params *params)
> +{
> +       memset(params, 0, sizeof(*params));

Unnecessary, already zero'd when created by qdisc_alloc()
Call chain
      qdisc_create
	      qdisc_alloc
              ops->init => pie_init
                 pie_params_init
      
> +       params->alpha = 2;
> +       params->beta = 20;
> +       params->tupdate = MS2PIETIME(30);       /* 30 ms */
> +       params->limit = PIE_DEFAULT_QUEUE_LIMIT;
> +       params->target = MS2PIETIME(20);        /* 20 ms */
> +       params->ecn = false;
> +       params->bytemode = false;
> +}


> +
> +static void pie_vars_init(struct pie_vars *vars)
> +{
> +       memset(vars, 0, sizeof(*vars));
ditto, already zero'd

> +       vars->dq_count = DQCOUNT_INVALID;
> +       vars->avg_dq_rate = 0;
> +       /* default of 100 ms in pietime  */
> +       vars->burst_time = MS2PIETIME(100);
> +}
> +
> +static void pie_stats_init(struct pie_stats *stats)
> +{
> +       memset(stats, 0, sizeof(*stats));
> +}
> +
ditto, already zero'd

> +struct pie_sched_data {
> +       struct pie_params params;
> +       struct pie_vars vars;
> +       struct pie_stats stats;
> +       struct timer_list adapt_timer;
> +};

Standard practice is to put data structures before code.

> +
> +static inline bool drop_early(struct Qdisc *sch, u32 packet_size)

Inline is not needed if static. Let compiler decide.

> +{
> +       struct pie_sched_data *q = qdisc_priv(sch);
> +       u32 rnd;
> +       u32 local_prob = q->vars.prob;
> +
> +
> +       /* If there is still burst allowance left or delay is much below target
> +        * not due to heavy dropping, skip random early drop
> +        */
> +       if (q->vars.burst_time > 0)
> +               return false;
> +
> +       /* If current delay is less than half of target, and
> +        * if drop prob is low already, disable early_drop
> +        */
> +       if ((q->vars.qdelay < q->params.target / 2)
> +           && (q->vars.prob < MAX_INT_VALUE / 5))
> +               return false;
> +
> +       /* If we have fewer than 2 packets, disable drop_early,
> +        * similar to min_th in RED
> +        */
> +       if (sch->qstats.backlog < 2 * 1500)
> +               return false;
> +
> +       /* If bytemode is turned on, use packet size to compute new
> +        * probablity. Smaller packets will have lower drop prob in this case
> +        */
> +       if (q->params.bytemode) {
> +               /* If packet_size is greater than THRESHOLD_PKT_SIZE,
> +                * we cap the probability to the maximum value
> +                */
> +               if (packet_size <= THRESHOLD_PKT_SIZE) {
> +                       local_prob =
> +                           (local_prob / THRESHOLD_PKT_SIZE) * packet_size;
> +               } else {
> +                       local_prob = MAX_INT_VALUE_CAP;
> +               }
> +
> +       } else {
> +               local_prob = q->vars.prob;
> +       }
> +
> +       rnd = net_random() % MAX_INT_VALUE_CAP;
> +       if (rnd < local_prob)
> +               return true;
> +
> +       return false;
> +}
> +
> +static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
> +{
> +       struct pie_sched_data *q = qdisc_priv(sch);
> +
> +       if (unlikely(qdisc_qlen(sch) >= sch->limit))
> +               goto out;
> +
> +       if (!drop_early(sch, skb->len)) {
> +               /* we can enqueue the packet */
> +               q->stats.packets_in++;
> +
> +               if (qdisc_qlen(sch) > q->stats.maxq)
> +                       q->stats.maxq = qdisc_qlen(sch);
> +
> +               return qdisc_enqueue_tail(skb, sch);
> +       } else if (q->params.ecn && INET_ECN_set_ce(skb) &&
> +                  (q->vars.prob <= MAX_INT_VALUE / 10)) {
> +                       /* If packet is ecn capable, mark it if drop probability
> +                        * is lower than 10%, else drop it.
> +                        */
> +                       q->stats.ecn_mark++;
> +                       return qdisc_enqueue_tail(skb, sch);
> +       }
> +out:
> +       q->stats.overlimit++;
> +       qdisc_drop(skb, sch);
> +       return NET_XMIT_CN;  /*indicate congestion*/
> +}
> +
> +static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = {
> +       [TCA_PIE_TARGET] = {.type = NLA_U32},
                                             ^ space before }
> +       [TCA_PIE_LIMIT] = {.type = NLA_U32},
> +       [TCA_PIE_TUPDATE] = {.type = NLA_U32},
> +       [TCA_PIE_ALPHA] = {.type = NLA_U32},
> +       [TCA_PIE_BETA] = {.type = NLA_U32},
> +       [TCA_PIE_ECN] = {.type = NLA_U32},
> +       [TCA_PIE_BYTEMODE] = {.type = NLA_U32},
              Looks prettier if all = are aligned

> +};
> +
> +static int pie_change(struct Qdisc *sch, struct nlattr *opt)
> +{
> +       struct pie_sched_data *q = qdisc_priv(sch);
> +       struct nlattr *tb[TCA_PIE_MAX + 1];
> +       unsigned int qlen;
> +       int err;
> +
> +       if (!opt)
> +               return -EINVAL;
> +
> +       err = nla_parse_nested(tb, TCA_PIE_MAX, opt, pie_policy);
> +       if (err < 0)
> +               return err;
> +
> +       sch_tree_lock(sch);
> +
> +       /* convert from microseconds to pietime */
> +       if (tb[TCA_PIE_TARGET]) {
> +               /* target is in us */
> +               u32 target = nla_get_u32(tb[TCA_PIE_TARGET]);
> +               /* convert to pietime */
> +               q->params.target = ((u64) target * NSEC_PER_USEC) >> PIE_SHIFT;
> +       }
> +
> +       if (tb[TCA_PIE_TUPDATE]) {
> +               /* tupdate is in us */
> +               u32 tupdate = nla_get_u32(tb[TCA_PIE_TUPDATE]);
> +               /* convert to pietime */
> +               q->params.tupdate =
> +                   ((u64) tupdate * NSEC_PER_USEC) >> PIE_SHIFT;
> +       }
> +
> +       if (tb[TCA_PIE_LIMIT]) {
> +               u32 limit = nla_get_u32(tb[TCA_PIE_LIMIT]);
> +               q->params.limit = limit;
> +               sch->limit = limit;
> +       }
> +
> +       if (tb[TCA_PIE_ALPHA])
> +               q->params.alpha = nla_get_u32(tb[TCA_PIE_ALPHA]);
> +
> +       if (tb[TCA_PIE_BETA])
> +               q->params.beta = nla_get_u32(tb[TCA_PIE_BETA]);
> +
> +       if (tb[TCA_PIE_ECN])
> +               q->params.ecn = nla_get_u32(tb[TCA_PIE_ECN]);
> +
> +       if (tb[TCA_PIE_BYTEMODE])
> +               q->params.bytemode = nla_get_u32(tb[TCA_PIE_BYTEMODE]);
> +
> +       /* Drop excess packets if new limit is lower */
> +       qlen = sch->q.qlen;
> +       while (sch->q.qlen > sch->limit) {
> +               struct sk_buff *skb = __skb_dequeue(&sch->q);
> +
> +               sch->qstats.backlog -= qdisc_pkt_len(skb);
> +               qdisc_drop(skb, sch);
> +       }
> +       qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
> +
> +       sch_tree_unlock(sch);
> +       return 0;
> +}
> +
> +static int pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
> +{
> +
> +       struct pie_sched_data *q = qdisc_priv(sch);
> +       int qlen = sch->qstats.backlog; /* current queue size in bytes */
> +
> +       /* If current queue is about 10 packets or more and dq_count is unset
> +        *  we have enough packets to calculate the drain rate. Save
> +        *  current time as dq_tstamp and start measurement cycle.
> +        */
> +
> +       if (qlen >= QUEUE_THRESHOLD && q->vars.dq_count == DQCOUNT_INVALID) {
> +               q->vars.dq_tstamp = pie_get_time();
> +               q->vars.dq_count = 0;
> +       }
> +
> +       /*  Calculate the average drain rate from this value.  If queue length
> +        *  has receded to a small value viz., <= QUEUE_THRESHOLD bytes,reset
> +        *  the dq_count to -1 as we don't have enough packets to calculate the
> +        *  drain rate anymore The following if block is entered only when we
> +        *  have a substantial queue built up (QUEUE_THRESHOLD bytes or more)
> +        *  and we calculate the drain rate for the threshold here.  dq_count is
> +        *  in bytes, time difference in pie_time, hence rate is in
> +        *  bytes/pie_time.
> +        */
> +
> +       if (q->vars.dq_count != DQCOUNT_INVALID) {

I prefer not having blank line after each block comment when it refers
directly to the code below (save screen space when reading).

> +
> +               q->vars.dq_count += skb->len;
> +
> +               if (q->vars.dq_count >= QUEUE_THRESHOLD) {
> +                       pie_time_t now = pie_get_time();
> +                       pie_tdiff_t dtime = now - q->vars.dq_tstamp;
> +                       u64 count = q->vars.dq_count << 3;  /* scale by 8*/

This shift doesn't do what you expect. Since dq_count is 32 bit,
the compiler does a 32 bit shift and then assigns it to count.
So either just make count 32 bit and skip all the expensive 64 bit divide,
or cast q->vars.dq_count to 64 bit first.

> +
> +                       if (dtime == 0)
> +                               return 0;
> +
> +                       /* dtime has overflowed */
> +                       if (dtime < 0)
> +                               dtime = -dtime;
> +
> +                       do_div(count, dtime);
> +
> +                       if (q->vars.avg_dq_rate == 0)
> +                               q->vars.avg_dq_rate = count;
> +                       else
> +                               q->vars.avg_dq_rate =
> +                                   (q->vars.avg_dq_rate -
> +                                    (q->vars.avg_dq_rate >> 3)) + (count >> 3);
> +
> +                       /* If the queue has receded below the threshold, we hold
> +                        * on to the last drain rate calculated, else we reset
> +                        * dq_count to 0 to re-enter the if block when the next
> +                        * packet is dequeued
> +                        */
> +
> +                       if (qlen < QUEUE_THRESHOLD)
> +                               q->vars.dq_count = DQCOUNT_INVALID;
> +                       else {
> +                               q->vars.dq_count = 0;
> +                               q->vars.dq_tstamp = pie_get_time();
> +                       }
> +
> +                       if (q->vars.burst_time > 0) {
> +                               if (q->vars.burst_time > dtime)
> +                                       q->vars.burst_time -= dtime;
> +                               else
> +                                       q->vars.burst_time = 0;
> +                       }
> +               }
> +
> +       }
> +       return 0;
> +}
> +
> +static void calculate_probability(struct Qdisc *sch)
> +{
> +       struct pie_sched_data *q = qdisc_priv(sch);
> +       int qlen = sch->qstats.backlog; /* queue size in bytes */
> +       pie_time_t qdelay = 0;  /* in pietime */
> +       pie_time_t qdelay_old = q->vars.qdelay; /* in pietime */
> +       s32 delta = 0;          /* signed difference */
> +       u32 oldprob;
> +       u32 alpha, beta;
> +       bool update_prob = true;        /* Should probability be updated? */
> +

Big problem, this is called without locks??
timer -> pie_timer -> calculate_probability

When you add locks you also have to worry about deadlock
on shutdown.

> +       q->vars.qdelay_old = q->vars.qdelay;
> +
> +       if (q->vars.avg_dq_rate > 0)
> +               qdelay = (qlen << 3) / q->vars.avg_dq_rate;
> +       else
> +               qdelay = 0;
> +
> +       /* If qdelay is zero and qlen is not, it means qlen is very small, less
> +        * than dequeue_rate, so we do not update probabilty in this round
> +        */
> +       if (qdelay == 0 && qlen != 0)
> +               update_prob = false;
> +
> +       /* Add ranges for alpha and beta, more aggressive for high dropping
> +        * mode and gentle steps for light dropping mode
> +        * In light dropping mode, take gentle steps; in medium dropping mode,
> +        * take medium steps; in high dropping mode, take big steps.
> +        */
> +       if (q->vars.prob < MAX_INT_VALUE / 100) {
> +               alpha =
> +                   (q->params.alpha * (MAX_INT_VALUE / PIE_TIME_PER_SEC)) >> 7;
> +               beta =
> +                   (q->params.beta * (MAX_INT_VALUE / PIE_TIME_PER_SEC)) >> 7;
> +       } else if (q->vars.prob < MAX_INT_VALUE / 10) {
> +               alpha =
> +                   (q->params.alpha * (MAX_INT_VALUE / PIE_TIME_PER_SEC)) >> 5;
> +               beta =
> +                   (q->params.beta * (MAX_INT_VALUE / PIE_TIME_PER_SEC)) >> 5;
> +       } else {
> +               alpha =
> +                   (q->params.alpha * (MAX_INT_VALUE / PIE_TIME_PER_SEC)) >> 4;
> +               beta =
> +                   (q->params.beta * (MAX_INT_VALUE / PIE_TIME_PER_SEC)) >> 4;
> +       }
> +
> +       /* alpha and beta should be between 0 and 32, in multiples of 1/16
> +        */
> +       delta += alpha * ((qdelay - q->params.target));
> +       delta += beta * ((qdelay - qdelay_old));
> +
> +       oldprob = q->vars.prob;
> +
> +       /* addition to ensure we increase probability in steps of no
> +        *  more than 2%
> +        */
> +
> +       if (delta > (s32) (MAX_INT_VALUE * 2 / 100)
> +           && q->vars.prob >= MAX_INT_VALUE / 10) {
> +               delta = MAX_INT_VALUE * 2 / 100;
> +       }
> +
> +       /*  Non-linear drop
> +        *  Tune drop probability to increase quickly for high delays
> +        *  (250ms and above)
> +        *  250ms is derived through experiments and provides error protection
> +        */
> +
> +       if (qdelay > (MS2PIETIME(250)))
> +               delta += (2 * MAX_INT_VALUE) / 100;
> +
> +       q->vars.prob += delta;
> +
> +       if (delta > 0) {
> +               /* prevent overflow */
> +               if (q->vars.prob < oldprob) {
> +                       q->vars.prob = MAX_INT_VALUE;
> +                       /* Prevent normalization error
> +                        * If probability is the maximum value already,
> +                        * we normalize it here, and skip the
> +                        * check to do a non-linear drop in the next section
> +                        */
> +                       update_prob = false;
> +               }
> +       } else {
> +               /* prevent underflow */
> +               if (q->vars.prob > oldprob)
> +                       q->vars.prob = 0;
> +       }
> +
> +       /* Non-linear drop in probability */
> +       /* Reduce drop probability quickly if delay is 0 for 2 consecutive
> +        * Tupdate periods
> +        */
> +       if ((qdelay == 0) && (qdelay_old == 0) && update_prob)
> +               q->vars.prob = (q->vars.prob * 98) / 100;
> +
> +       q->vars.qdelay = qdelay;
> +       q->vars.qlen_old = qlen;
> +
> +       /* we restart the measurement cycle if the following conditions are met
> +        *  1. If the delay has been low for 2 consecutive Tupdate periods
> +        *  2. Calculated drop probability is zero
> +        *  3. We have atleast one estimate for the avg_dq_rate ie.,
> +        *     is a non-zero value
> +        */
> +       if ((q->vars.qdelay < q->params.target / 2)
> +           && (q->vars.qdelay_old < q->params.target / 2)
> +           && (q->vars.prob == 0)
> +           && q->vars.avg_dq_rate > 0)
> +               pie_vars_init(&q->vars);
> +
> +       return;
> +}

Don't do empty return at end of function. It is unnecessary.


> +
> +static inline void pie_timer(unsigned long arg)

Since this is a callback it can't be inlined anyway

> +{
> +       struct Qdisc *sch = (struct Qdisc *)arg;
> +       struct pie_sched_data *q = qdisc_priv(sch);
> +       u64 tup;
Does this really have to be 64 bit?

> +
> +       calculate_probability(sch);
> +       /* reset the timer to fire after 'tupdate' us,
> +        * tupdate is currently in pie_time
> +        * mod_timer expects time to be in jiffies
> +        */
> +       /* convert from pietime to nsecs to ms*/
> +       tup = pie_time_to_ms(q->params.tupdate);
> +       tup = (tup * HZ) / (1000);      /* and then to  jiffies */

Useless paren around (1000)

> +
> +       mod_timer(&q->adapt_timer, jiffies + tup);
> +
> +       return;
> +
> +}
> +
> +static int pie_init(struct Qdisc *sch, struct nlattr *opt)
> +{
> +       struct pie_sched_data *q = qdisc_priv(sch);
> +
> +       pie_params_init(&q->params);
> +       pie_vars_init(&q->vars);
> +       pie_stats_init(&q->stats);
> +       sch->limit = q->params.limit;
> +       setup_timer(&q->adapt_timer, pie_timer, (unsigned long)sch);
> +       add_timer(&q->adapt_timer);

This is wrong, you haven't set expiration timer (will be 0), so timer
will fire.

> +
> +       if (opt) {
> +               int err = pie_change(sch, opt);
> +
> +               if (err)
> +                       return err;
> +       }
> +
> +       return 0;
> +}
> +
> +static int pie_dump(struct Qdisc *sch, struct sk_buff *skb)
> +{
> +       struct pie_sched_data *q = qdisc_priv(sch);
> +       struct nlattr *opts;
> +
> +       opts = nla_nest_start(skb, TCA_OPTIONS);
> +       if (opts == NULL)
> +               goto nla_put_failure;
> +
> +       /* convert target and tupdate from pietime to us */
> +       if (nla_put_u32(skb, TCA_PIE_TARGET,
> +                       pie_time_to_ms(q->params.target) * 1000L) ||
> +           nla_put_u32(skb, TCA_PIE_LIMIT,
> +                       sch->limit) ||
> +           nla_put_u32(skb, TCA_PIE_TUPDATE,
> +                       pie_time_to_ms(q->params.tupdate) * 1000L) ||
> +           nla_put_u32(skb, TCA_PIE_ALPHA,
> +                       q->params.alpha) ||
> +           nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) ||
> +           nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) ||
> +           nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode))
> +               goto nla_put_failure;
> +
> +       return nla_nest_end(skb, opts);
> +
> +nla_put_failure:
> +       nla_nest_cancel(skb, opts);
> +       return -1;
> +
> +}
> +
> +static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
> +{
> +       struct pie_sched_data *q = qdisc_priv(sch);
> +       struct tc_pie_xstats st = {
> +               .prob = q->vars.prob,
> +               .delay = pie_time_to_ms(q->vars.qdelay) * 1000, /* in us*/
> +               /* unscale and return dq_rate in bytes per sec*/
> +               .avg_dq_rate = q->vars.avg_dq_rate * (PIE_TIME_PER_SEC/8),
> +               .packets_in = q->stats.packets_in,
> +               .overlimit = q->stats.overlimit,
> +               .maxq = q->stats.maxq,
> +               .dropped = q->stats.dropped,
> +               .ecn_mark = q->stats.ecn_mark,
> +       };
Personal preference, I prefer a aligned initialization style.

> +
> +       return gnet_stats_copy_app(d, &st, sizeof(st));
> +}
> +
> +static inline struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch)
> +{
> +       struct sk_buff *skb;
> +       skb = __qdisc_dequeue_head(sch, &sch->q);
> +
> +       if (!skb)
> +               return NULL;
> +
> +       pie_process_dequeue(sch, skb);
> +
> +       return skb;
> +}
> +
> +static void pie_reset(struct Qdisc *sch)
> +{
> +       struct pie_sched_data *q = qdisc_priv(sch);
> +       qdisc_reset_queue(sch);
> +       pie_vars_init(&q->vars);
> +
> +       return;
> +}
> +
> +static void pie_destroy(struct Qdisc *sch)
> +{
> +       struct pie_sched_data *q = qdisc_priv(sch);
> +
> +       del_timer_sync(&q->adapt_timer);
> +}
> +
> +static struct Qdisc_ops pie_qdisc_ops __read_mostly = {
> +       .id = "pie",
> +       .priv_size = sizeof(struct pie_sched_data),
> +
> +       .enqueue = pie_qdisc_enqueue,
> +       .dequeue = pie_qdisc_dequeue,
> +       .peek = qdisc_peek_dequeued,
> +       .init = pie_init,
> +       .destroy = pie_destroy,
> +       .reset = pie_reset,
> +       .change = pie_change,
> +       .dump = pie_dump,
> +       .dump_stats = pie_dump_stats,
> +       .owner = THIS_MODULE,
> +};
> +
> +static int __init pie_module_init(void)
> +{
> +       return register_qdisc(&pie_qdisc_ops);
> +}
> +
> +static void __exit pie_module_exit(void)
> +{
> +       unregister_qdisc(&pie_qdisc_ops);
> +}
> +
> +module_init(pie_module_init);
> +module_exit(pie_module_exit);
> +
> +MODULE_DESCRIPTION
> +       ("PIE (Proportional Intergal controller Enhanced) scheduler");
Keep this on one line, ignore any complaints from checkpatch about it.

> +MODULE_AUTHOR("Vijay Subramanian");
> +MODULE_AUTHOR("Mythili Prabhu");
> +MODULE_LICENSE("GPL");

Please fix and resubmit. This is just a first pass review, there are
probably more detailed issues that others will see.

^ permalink raw reply

* You've won a Prize
From: Microsoft Iberica SL @ 2013-09-28 12:56 UTC (permalink / raw)


You've won a Prize
MICROSOFT IBERICA SL"
YOU 'VE WON.
ATTN:MICROSOFT IBERICA SL
Your email has won (EUR244,000,00)
(TWO HUNDRED AND FOURTY FOUR THOUSAND EURO)
Batch number:XL73276498AM
Ref number:QR352899526KC
This is a millennium scientific computer game in which
email addresses were used.It is a promotional program aimed at
encouraging internet users,therefore you do not need to buy ticket to enter
for it.
For further development,clarification and procedure please
Contact:Dr Eduardo Sanchez,
Email contact:payingroll446@yahoo.com.hk

^ permalink raw reply

* Re: [PATCH RESEND] iproute2: GRE over IPv6 tunnel support.
From: Stephen Hemminger @ 2013-09-28 17:14 UTC (permalink / raw)
  To: Dmitry Kozlov; +Cc: Hannes Frederic Sowa, Templin, Fred L, netdev
In-Reply-To: <20130928113251.75738a49@comp1>

On Sat, 28 Sep 2013 11:32:51 +0400
Dmitry Kozlov <xeb@mail.ru> wrote:

> GRE over IPv6 tunnel support.
> 
> Signed-off-by: Dmitry Kozlov <xeb@mail.ru>
> ---
>  ip/Makefile    |   3 +-
>  ip/ip6tunnel.c | 131 ++++++++++++++++---
>  ip/iplink.c    |   7 +-
>  ip/link_gre6.c | 398 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  4 files changed, 516 insertions(+), 23 deletions(-)
>  create mode 100644 ip/link_gre6.c
> 

Please send an additional patch to update the man page.

^ permalink raw reply

* Re: [PATCH v1 net-next] net: pkt_sched: PIE AQM scheme
From: Eric Dumazet @ 2013-09-28 17:19 UTC (permalink / raw)
  To: Vijay Subramanian; +Cc: netdev, davem, shemminger, Mythili Prabhu, Dave Taht
In-Reply-To: <1380333383-9507-1-git-send-email-subramanian.vijay@gmail.com>


> +MODULE_DESCRIPTION
> +	("PIE (Proportional Intergal controller Enhanced) scheduler");

Intergal -> Integral

^ permalink raw reply

* See the attached file
From: Microsoft Promotion @ 2013-09-28 17:45 UTC (permalink / raw)

In-Reply-To: <1380390305.94955.YahooMailNeo@web5705.biz.mail.ne1.yahoo.com>

[-- Attachment #1: Type: text/plain, Size: 21 bytes --]

See the attached file

[-- Attachment #2: MICROSOFT_AWARD_PROMOTION_2013.doc --]
[-- Type: application/msword, Size: 124416 bytes --]

^ permalink raw reply

* Re: [PATCH 11/12] netfilter: Remove extern from function prototypes
From: Jan Engelhardt @ 2013-09-28 19:17 UTC (permalink / raw)
  To: Joe Perches
  Cc: netdev, David S. Miller, linux-kernel, Pablo Neira Ayuso,
	Patrick McHardy, Jozsef Kadlecsik, netfilter-devel, netfilter,
	coreteam
In-Reply-To: <de1106130366672acb936422f4da7cbb1aafbda1.1379961014.git.joe@perches.com>

On Monday 2013-09-23 20:37, Joe Perches wrote:

>There are a mix of function prototypes with and without extern
>in the kernel sources.  Standardize on not using extern for
>function prototypes.
>
>Function prototypes don't need to be written with extern.
>extern is assumed by the compiler.  Its use is as unnecessary as
>using auto to declare automatic/local variables in a block.

Or you could just extern all functions for consistency with variables.

^ permalink raw reply

* [PATCH net-next] bonding: RCUify bond_set_rx_mode()
From: Veaceslav Falico @ 2013-09-28 19:18 UTC (permalink / raw)
  To: netdev; +Cc: joe.lawrence, Veaceslav Falico, Jay Vosburgh, Andy Gospodarek

Currently we rely on rtnl locking in bond_set_rx_mode(), however it's not
always the case:

RTNL: assertion failed at drivers/net/bonding/bond_main.c (3391)
...
 [<ffffffff81651ca5>] dump_stack+0x54/0x74
 [<ffffffffa029e717>] bond_set_rx_mode+0xc7/0xd0 [bonding]
 [<ffffffff81553af7>] __dev_set_rx_mode+0x57/0xa0
 [<ffffffff81557ff8>] __dev_mc_add+0x58/0x70
 [<ffffffff81558020>] dev_mc_add+0x10/0x20
 [<ffffffff8161e26e>] igmp6_group_added+0x18e/0x1d0
 [<ffffffff81186f76>] ? kmem_cache_alloc_trace+0x236/0x260
 [<ffffffff8161f80f>] ipv6_dev_mc_inc+0x29f/0x320
 [<ffffffff8161f9e7>] ipv6_sock_mc_join+0x157/0x260
...

Fix this by using RCU primitives.

Reported-by: Joe Lawrence <joe.lawrence@stratus.com>
Tested-by: Joe Lawrence <joe.lawrence@stratus.com>
CC: Jay Vosburgh <fubar@us.ibm.com>
CC: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---
 drivers/net/bonding/bond_main.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index d5c3153..996d196 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3393,20 +3393,21 @@ static void bond_set_rx_mode(struct net_device *bond_dev)
 	struct list_head *iter;
 	struct slave *slave;
 
-	ASSERT_RTNL();
 
+	rcu_read_lock();
 	if (USES_PRIMARY(bond->params.mode)) {
-		slave = rtnl_dereference(bond->curr_active_slave);
+		slave = rcu_dereference(bond->curr_active_slave);
 		if (slave) {
 			dev_uc_sync(slave->dev, bond_dev);
 			dev_mc_sync(slave->dev, bond_dev);
 		}
 	} else {
-		bond_for_each_slave(bond, slave, iter) {
+		bond_for_each_slave_rcu(bond, slave, iter) {
 			dev_uc_sync_multiple(slave->dev, bond_dev);
 			dev_mc_sync_multiple(slave->dev, bond_dev);
 		}
 	}
+	rcu_read_unlock();
 }
 
 static int bond_neigh_init(struct neighbour *n)
-- 
1.8.4

^ permalink raw reply related

* Re: [PATCH net-next] xen-netfront: convert to GRO API and advertise this feature
From: David Miller @ 2013-09-28 19:38 UTC (permalink / raw)
  To: wei.liu2; +Cc: netdev, xen-devel, abchak, ian.campbell
In-Reply-To: <1379779543-27122-1-git-send-email-wei.liu2@citrix.com>

From: Wei Liu <wei.liu2@citrix.com>
Date: Sat, 21 Sep 2013 17:05:43 +0100

> @@ -1371,7 +1373,8 @@ static struct net_device *xennet_create_dev(struct xenbus_device *dev)
>  	netif_napi_add(netdev, &np->napi, xennet_poll, 64);
>  	netdev->features        = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
>  				  NETIF_F_GSO_ROBUST;
> -	netdev->hw_features	= NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO;
> +	netdev->hw_features	= NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO |
> +				  NETIF_F_GRO;

Please post a new version of this patch with the feedback you've been
given integrated, in particular with this part removed because it is
not necessary.

Ian, please review the patch when Wei posts it.

Thanks.

^ permalink raw reply

* Re: [PATCH 2/2] drivers: net: vmxnet3 : vmxnet3_drv.c: removed checkaptch warning related to msleep()
From: David Miller @ 2013-09-28 19:38 UTC (permalink / raw)
  To: avi.kp.137; +Cc: sbhatewara, pv-drivers, netdev, linux-kernel
In-Reply-To: <1379866187-3158-1-git-send-email-avi.kp.137@gmail.com>


I see only patch #2 and #3.

Sort out why only 2 of the 3 patches were posted, and resend them
all.

Thank you.

^ permalink raw reply

* Re: [PATCH v1] USBNET: fix handling padding packet
From: David Miller @ 2013-09-28 19:45 UTC (permalink / raw)
  To: ming.lei-Z7WLFzj8eWMS+FvcfC7Uqw
  Cc: gregkh-hQyY1W1yCW8ekmWlsbkhG0B+6BGkLq7r, oneukum-l3A5Bk7waGM,
	netdev-u79uwXL29TY76Z2rM5mHXA, linux-usb-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1379941175-10500-1-git-send-email-ming.lei-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>

From: Ming Lei <ming.lei-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>
Date: Mon, 23 Sep 2013 20:59:35 +0800

> Commit 638c5115a7949(USBNET: support DMA SG) introduces DMA SG
> if the usb host controller is capable of building packet from
> discontinuous buffers, but missed handling padding packet when
> building DMA SG.
> 
> This patch attachs the pre-allocated padding packet at the
> end of the sg list, so padding packet can be sent to device
> if drivers require that.
> 
> Reported-by: David Laight <David.Laight-JxhZ9S5GRejQT0dZR+AlfA@public.gmane.org>
> Acked-by: Oliver Neukum <oliver-GvhC2dPhHPQdnm+yROfE0A@public.gmane.org>
> Signed-off-by: Ming Lei <ming.lei-Z7WLFzj8eWMS+FvcfC7Uqw@public.gmane.org>

Applied, thanks.

I still think the suggestion to disable scatter gather for
devices with the padding issue was the most sane approach
to solve this.

I guess people like supporting complicated crap and excess
code for things that pretty much do not exist, or at best
are not prominent enough to cater for at all.

--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH net-next] net ipv4: Convert ipv4.ip_local_port_range to be per netns
From: David Miller @ 2013-09-28 19:52 UTC (permalink / raw)
  To: ebiederm; +Cc: netdev
In-Reply-To: <87fvswt5m5.fsf@tw-ebiederman.twitter.com>

From: ebiederm@xmission.com (Eric W. Biederman)
Date: Sun, 22 Sep 2013 23:27:30 -0700

> 
> - Move sysctl_local_ports from a global variable into struct netns_ipv4.
> - Modify inet_get_local_port_range to take a struct net.
> - Manually expand inet_get_local_range into ipv4_local_port_range
>   because I do not know the struct net.
> - Move the initialization of sysctl_local_ports into
>   sysctl_net_ipv4.c:ipv4_sysctl_init_net from inet_connection_sock.c
> 
> Originally-by: Samya <samya@twitter.com>
> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>

Applied.

^ permalink raw reply

* Re: [PATCH net-next] ipv6: Not need to set fl6.flowi6_flags as zero
From: David Miller @ 2013-09-28 19:52 UTC (permalink / raw)
  To: roy.qing.li; +Cc: netdev
In-Reply-To: <1379919359-3032-1-git-send-email-roy.qing.li@gmail.com>

From: roy.qing.li@gmail.com
Date: Mon, 23 Sep 2013 14:55:59 +0800

> From: Li RongQing <roy.qing.li@gmail.com>
> 
> setting fl6.flowi6_flags as zero after memset is redundant, Remove it.
> 
> Signed-off-by: Li RongQing <roy.qing.li@gmail.com>

Applied.

^ permalink raw reply

* Re: [PATCH v5] IPv6 NAT: Do not drop DNATed 6to4/6rd packets
From: David Miller @ 2013-09-28 19:57 UTC (permalink / raw)
  To: hannes; +Cc: catab, netdev, yoshfuji, joe
In-Reply-To: <20130924213606.GB4446@order.stressinduktion.org>

From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Tue, 24 Sep 2013 23:36:06 +0200

> On Mon, Sep 23, 2013 at 11:04:19PM +0300, Catalin(ux) M. BOIE wrote:
>> When a router is doing  DNAT for 6to4/6rd packets the latest anti-spoofing
>> patch (218774dc) will drop them because the IPv6 address embedded
>> does not match the IPv4 destination. This patch will allow them to
>> pass by testing if we have an address that matches on 6to4/6rd interface.
>> I have been hit by this problem using Fedora and IPV6TO4_IPV4ADDR.
>> Also, log the dropped packets (with rate limit).
>> 
>> Signed-off-by: Catalin(ux) M. BOIE <catab@embedromix.ro>
> 
> Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>

Applied, but Catalin please strictly refer to changes in the following
precise format:

	commit $SHA1_ID ("Commit message header line text")

Because SHA1_IDs are ambiguous, especially when the change in question
is backported into various -stable branches.

The only way to resolve the ambiguity is to provide the commit message
text (in parenthesis and double quotes).

^ permalink raw reply

* Re: [PATCH net-next] net ipv4: Convert ipv4.ip_local_port_range to be per netns
From: David Miller @ 2013-09-28 20:07 UTC (permalink / raw)
  To: ebiederm; +Cc: netdev
In-Reply-To: <20130928.155228.211244914284752204.davem@davemloft.net>

From: David Miller <davem@davemloft.net>
Date: Sat, 28 Sep 2013 15:52:28 -0400 (EDT)

> From: ebiederm@xmission.com (Eric W. Biederman)
> Date: Sun, 22 Sep 2013 23:27:30 -0700
> 
>> 
>> - Move sysctl_local_ports from a global variable into struct netns_ipv4.
>> - Modify inet_get_local_port_range to take a struct net.
>> - Manually expand inet_get_local_range into ipv4_local_port_range
>>   because I do not know the struct net.
>> - Move the initialization of sysctl_local_ports into
>>   sysctl_net_ipv4.c:ipv4_sysctl_init_net from inet_connection_sock.c
>> 
>> Originally-by: Samya <samya@twitter.com>
>> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
> 
> Applied.

I had to revert, you didn't throughly build test this.

security/selinux/hooks.c: In function ‘selinux_socket_bind’:
security/selinux/hooks.c:3933:4: error: incompatible type for argument 1 of ‘inet_get_local_port_range’
In file included from security/selinux/hooks.c:53:0:
include/net/ip.h:206:6: note: expected ‘struct net *’ but argument is of type ‘struct lsm_network_audit’

And when you repost make sure to deal with the space vs. TAB
issues pointed out to you.

Thanks.

^ permalink raw reply

* Re: [PATCH] ipv6: Fix preferred_lft not updating in some cases
From: Hannes Frederic Sowa @ 2013-09-28 20:28 UTC (permalink / raw)
  To: Paul Marks; +Cc: netdev, davem, yoshfuji, Lorenzo Colitti
In-Reply-To: <CAHaKRvJDZJjuv4sALmQAotk5EUMfYPiLN=8_noWCRQYOW+bxSA@mail.gmail.com>

On Fri, Sep 27, 2013 at 01:28:06PM -0700, Paul Marks wrote:
> On Fri, Sep 27, 2013 at 1:16 AM, Hannes Frederic Sowa
> <hannes@stressinduktion.org> wrote:
> > On Wed, Sep 25, 2013 at 03:12:55PM -0700, Paul Marks wrote:
> >> -                                     if (prefered_lft != ifp->prefered_lft) {
> >
> > Wouldn't the easiest solution be to just drop this if and execute the two
> > lines below unconditionally?
> 
> Yes, that's also correct.  But is it not better to have simpler code
> than shorter diffs?  Should we transliterate English to C, or think
> about what the algorithm is actually doing?  The fact that this bug
> has gone unnoticed provides some evidence that the code may have been
> too complicated.

I don't care about the length of diffs or shorter code. I would favour
a transliteration here because it makes verification easier (at least
for me). The algorithm is not that complex and I guess the bug has been
unnoticed because nobody ran into problems and cared til now.

So, why not get rid of update_lft then?

> >> +                             const u32 minimum_lft = min(
> >> +                                     stored_lft, (u32)MIN_VALID_LIFETIME);
> >> +                             valid_lft = max(valid_lft, minimum_lft);
> >
> > Quick question: Don't we need a prefered_lft = min(preferred_lft, valid_lft)
> > here?
> 
> The invariant is (preferred_lft <= valid_lft), and valid_lft can only
> get bigger, so I don't think there's a problem.

Ah, I got confused. Missed in the last case that it got tested earlier in the
function. Your code looks correct regarding every rule.

Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>

Thanks,

  Hannes

^ permalink raw reply

* Re: [PATCH net-next] net ipv4: Convert ipv4.ip_local_port_range to be per netns
From: Eric W. Biederman @ 2013-09-28 20:32 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20130928.160753.1218915059639502436.davem@davemloft.net>

David Miller <davem@davemloft.net> writes:

> From: David Miller <davem@davemloft.net>
> Date: Sat, 28 Sep 2013 15:52:28 -0400 (EDT)
>
>> From: ebiederm@xmission.com (Eric W. Biederman)
>> Date: Sun, 22 Sep 2013 23:27:30 -0700
>> 
>>> 
>>> - Move sysctl_local_ports from a global variable into struct netns_ipv4.
>>> - Modify inet_get_local_port_range to take a struct net.
>>> - Manually expand inet_get_local_range into ipv4_local_port_range
>>>   because I do not know the struct net.
>>> - Move the initialization of sysctl_local_ports into
>>>   sysctl_net_ipv4.c:ipv4_sysctl_init_net from inet_connection_sock.c
>>> 
>>> Originally-by: Samya <samya@twitter.com>
>>> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
>> 
>> Applied.
>
> I had to revert, you didn't throughly build test this.

My apologies, I was pushing a little too hard that day.  v3 will be
coming with the fix.

> security/selinux/hooks.c: In function ‘selinux_socket_bind’:
> security/selinux/hooks.c:3933:4: error: incompatible type for argument 1 of ‘inet_get_local_port_range’
> In file included from security/selinux/hooks.c:53:0:
> include/net/ip.h:206:6: note: expected ‘struct net *’ but argument is of type ‘struct lsm_network_audit’
>
> And when you repost make sure to deal with the space vs. TAB
> issues pointed out to you.

Definitely.

Eric

^ permalink raw reply

* Re: IPv6 path MTU discovery broken
From: Hannes Frederic Sowa @ 2013-09-28 20:33 UTC (permalink / raw)
  To: Steinar H. Gunderson; +Cc: netdev, edumazet
In-Reply-To: <20130927201420.GB12043@sesse.net>

Hello!

On Fri, Sep 27, 2013 at 10:14:20PM +0200, Steinar H. Gunderson wrote:
> So the “packet too big” packets really look like they're being ignored.
> However, they _do_ reach the kernel somehow, since Icmp6InPktTooBigs
> seems to increase.
> 
> Could this be related somehow to the packets coming from 2001:67c:29f4::31,
> while the default route is to a link-local address? (An RPF issue?) This used
> to work (although it was often flaky for me) in 3.10 and before. I can't
> easily bisect, though, as I don't boot this machine too often.

This looks like a bug and should definitely get fixed. There should be
no RPF issue. May I have a look at your /proc/net/ipv6_route?

Thanks,

  Hannes

^ permalink raw reply

* MUTUAL PROJECT
From: jing01lee @ 2013-09-28 20:07 UTC (permalink / raw)
  To: Recipients

Hello

I have a business proposal for you. There is no risks involved.
Pls reply for briefs. 
Mr Lee

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox