Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH iproute2-next] ip: Add support for nexthop objects
From: dsahern @ 2018-09-01  0:49 UTC (permalink / raw)
  To: netdev; +Cc: roopa, sharpd, idosch, davem, David Ahern
In-Reply-To: <20180901004954.7145-1-dsahern@kernel.org>

From: David Ahern <dsahern@gmail.com>

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/uapi/linux/nexthop.h   |  56 ++++
 include/uapi/linux/rtnetlink.h |   8 +
 ip/Makefile                    |   3 +-
 ip/ip.c                        |   3 +-
 ip/ip_common.h                 |   7 +-
 ip/ipmonitor.c                 |   6 +
 ip/ipnexthop.c                 | 652 +++++++++++++++++++++++++++++++++++++++++
 ip/iproute.c                   |  19 +-
 8 files changed, 747 insertions(+), 7 deletions(-)
 create mode 100644 include/uapi/linux/nexthop.h
 create mode 100644 ip/ipnexthop.c

diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h
new file mode 100644
index 000000000000..335182e8229a
--- /dev/null
+++ b/include/uapi/linux/nexthop.h
@@ -0,0 +1,56 @@
+#ifndef __LINUX_NEXTHOP_H
+#define __LINUX_NEXTHOP_H
+
+#include <linux/types.h>
+
+struct nhmsg {
+	unsigned char	nh_family;
+	unsigned char	nh_scope;     /* one of RT_SCOPE */
+	unsigned char	nh_protocol;  /* Routing protocol that installed nh */
+	unsigned char	resvd;
+	unsigned int	nh_flags;     /* RTNH_F flags */
+};
+
+struct nexthop_grp {
+	__u32	id;
+	__u32	weight;
+};
+
+enum {
+	NEXTHOP_GRP_TYPE_MPATH,  /* default type if not specified */
+	__NEXTHOP_GRP_TYPE_MAX,
+};
+
+#define NEXTHOP_GRP_TYPE_MAX (__NEXTHOP_GRP_TYPE_MAX - 1)
+
+
+/* NHA_ID	32-bit id for nexthop. id must be greater than 0.
+ *		id == 0 means assign an unused id.
+ */
+enum {
+	NHA_UNSPEC,
+	NHA_ID,		/* u32 */
+	NHA_GROUP,	/* array of nexthop_grp */
+	NHA_GROUP_TYPE,	/* u16 one of NEXTHOP_GRP_TYPE;
+			 * default is NEXTHOP_GRP_TYPE_MPATH */
+
+	/* if NHA_GROUP attribute is added, no other attributes can be set */
+
+	NHA_BLACKHOLE,	/* flag; nexthop used to blackhole packets */
+	NHA_OIF,	/* u32 */
+	NHA_FLOW,	/* u32 */
+
+	NHA_TABLE_ID,	/* u32 - table id to validate gateway */
+	NHA_GATEWAY,	/* be32 (IPv4) or in6_addr (IPv6) gw address */
+
+	/* Dump control attributes */
+	NHA_GROUPS,	/* flag; only return nexthop groups in dump */
+	NHA_MASTER,	/* u32; only return nexthops with given master dev */
+
+	NHA_SADDR,	/* return only: IPv4 or IPv6 source address */
+
+	__NHA_MAX,
+};
+
+#define NHA_MAX	(__NHA_MAX - 1)
+#endif
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 8c1d600bfa33..158114245b6c 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -157,6 +157,13 @@ enum {
 	RTM_GETCHAIN,
 #define RTM_GETCHAIN RTM_GETCHAIN
 
+	RTM_NEWNEXTHOP = 104,
+#define RTM_NEWNEXTHOP RTM_NEWNEXTHOP
+	RTM_DELNEXTHOP,
+#define RTM_DELNEXTHOP RTM_DELNEXTHOP
+	RTM_GETNEXTHOP,
+#define RTM_GETNEXTHOP RTM_GETNEXTHOP
+
 	__RTM_MAX,
 #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
 };
@@ -342,6 +349,7 @@ enum rtattr_type_t {
 	RTA_IP_PROTO,
 	RTA_SPORT,
 	RTA_DPORT,
+	RTA_NH_ID,
 	__RTA_MAX
 };
 
diff --git a/ip/Makefile b/ip/Makefile
index a88f93665ee6..7df818dbe23a 100644
--- a/ip/Makefile
+++ b/ip/Makefile
@@ -10,7 +10,8 @@ IPOBJ=ip.o ipaddress.o ipaddrlabel.o iproute.o iprule.o ipnetns.o \
     link_iptnl.o link_gre6.o iplink_bond.o iplink_bond_slave.o iplink_hsr.o \
     iplink_bridge.o iplink_bridge_slave.o ipfou.o iplink_ipvlan.o \
     iplink_geneve.o iplink_vrf.o iproute_lwtunnel.o ipmacsec.o ipila.o \
-    ipvrf.o iplink_xstats.o ipseg6.o iplink_netdevsim.o iplink_rmnet.o
+    ipvrf.o iplink_xstats.o ipseg6.o iplink_netdevsim.o iplink_rmnet.o \
+    ipnexthop.o
 
 RTMONOBJ=rtmon.o
 
diff --git a/ip/ip.c b/ip/ip.c
index 58c643df8a36..963ef140c7c4 100644
--- a/ip/ip.c
+++ b/ip/ip.c
@@ -51,7 +51,7 @@ static void usage(void)
 "where  OBJECT := { link | address | addrlabel | route | rule | neigh | ntable |\n"
 "                   tunnel | tuntap | maddress | mroute | mrule | monitor | xfrm |\n"
 "                   netns | l2tp | fou | macsec | tcp_metrics | token | netconf | ila |\n"
-"                   vrf | sr }\n"
+"                   vrf | sr | nexthop }\n"
 "       OPTIONS := { -V[ersion] | -s[tatistics] | -d[etails] | -r[esolve] |\n"
 "                    -h[uman-readable] | -iec | -j[son] | -p[retty] |\n"
 "                    -f[amily] { inet | inet6 | ipx | dnet | mpls | bridge | link } |\n"
@@ -101,6 +101,7 @@ static const struct cmd {
 	{ "netconf",	do_ipnetconf },
 	{ "vrf",	do_ipvrf},
 	{ "sr",		do_seg6 },
+	{ "nexthop",	do_ipnh },
 	{ "help",	do_help },
 	{ 0 }
 };
diff --git a/ip/ip_common.h b/ip/ip_common.h
index 200be5e23dd1..2971c1586c4e 100644
--- a/ip/ip_common.h
+++ b/ip/ip_common.h
@@ -56,6 +56,8 @@ int print_rule(const struct sockaddr_nl *who,
 int print_netconf(const struct sockaddr_nl *who,
 		  struct rtnl_ctrl_data *ctrl,
 		  struct nlmsghdr *n, void *arg);
+int print_nexthop(const struct sockaddr_nl *who,
+		  struct nlmsghdr *n, void *arg);
 void netns_map_init(void);
 void netns_nsid_socket_init(void);
 int print_nsid(const struct sockaddr_nl *who,
@@ -90,6 +92,7 @@ int do_ipvrf(int argc, char **argv);
 void vrf_reset(void);
 int netns_identify_pid(const char *pidstr, char *name, int len);
 int do_seg6(int argc, char **argv);
+int do_ipnh(int argc, char **argv);
 
 int iplink_get(unsigned int flags, char *name, __u32 filt_mask);
 int iplink_ifla_xstats(int argc, char **argv);
@@ -165,5 +168,7 @@ int name_is_vrf(const char *name);
 #endif
 
 void print_num(FILE *fp, unsigned int width, uint64_t count);
-
+void print_rta_flow(FILE *fp, const struct rtattr *rta);
+void print_rt_flags(FILE *fp, unsigned int flags);
+void print_rta_if(FILE *fp, const struct rtattr *rta, const char *prefix);
 #endif /* _IP_COMMON_H_ */
diff --git a/ip/ipmonitor.c b/ip/ipmonitor.c
index a93b62cd6624..de129626683b 100644
--- a/ip/ipmonitor.c
+++ b/ip/ipmonitor.c
@@ -84,6 +84,12 @@ static int accept_msg(const struct sockaddr_nl *who,
 		}
 	}
 
+	case RTM_NEWNEXTHOP:
+	case RTM_DELNEXTHOP:
+		print_headers(fp, "[NEXTHOP]", ctrl);
+		print_nexthop(who, n, arg);
+		return 0;
+
 	case RTM_NEWLINK:
 	case RTM_DELLINK:
 		ll_remember_index(who, n, NULL);
diff --git a/ip/ipnexthop.c b/ip/ipnexthop.c
new file mode 100644
index 000000000000..9fa4b7292426
--- /dev/null
+++ b/ip/ipnexthop.c
@@ -0,0 +1,652 @@
+/*
+ * ip nexthop
+ *
+ * Copyright (C) 2017 Cumulus Networks
+ * Copyright (c) 2017 David Ahern <dsa@cumulusnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <errno.h>
+#include <linux/nexthop.h>
+#include <libmnl/libmnl.h>
+#include <rt_names.h>
+
+#include "utils.h"
+#include "ip_common.h"
+
+static struct
+{
+	unsigned int flushed;
+	unsigned int groups;
+	char *flushb;
+	int flushp;
+	int flushe;
+} filter;
+
+enum {
+	IPNH_LIST,
+	IPNH_FLUSH,
+};
+
+#define RTM_NHA(h)  ((struct rtattr *)(((char *)(h)) + \
+			NLMSG_ALIGN(sizeof(struct nhmsg))))
+
+static void usage(void) __attribute__((noreturn));
+
+static void usage(void)
+{
+	fprintf(stderr, "Usage: ip nexthop { list | flush } SELECTOR\n");
+	fprintf(stderr, "       ip nexthop get [ id ID ]\n");
+	fprintf(stderr, "       ip nexthop { add | del | change | replace } NH\n");
+	fprintf(stderr, "SELECTOR := [ id ID ] [ dev DEV ] [ table TABLE ] [ vrf NAME ]\n");
+	fprintf(stderr, "NH := [ encap ENCAPTYPE ENCAPHDR ] [ via [ FAMILY ] ADDRESS ]\n");
+	fprintf(stderr, "      [ id ID ] [ dev STRING ] [ weight NUMBER ]\n");
+	fprintf(stderr, "      [ table TABLE ] [ vrf VRF ] NHFLAGS\n");
+	fprintf(stderr, "NHFLAGS := [ onlink | pervasive ]\n");
+	fprintf(stderr, "ENCAPTYPE := [ mpls | ip | ip6 ]\n");
+	fprintf(stderr, "ENCAPHDR := [ MPLSLABEL ]\n");
+	exit(-1);
+}
+
+static int delete_nexthop(__u32 id)
+{
+	struct {
+		struct nlmsghdr	n;
+		struct nhmsg	nhm;
+		char		buf[64];
+	} req = {
+		.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)),
+		.n.nlmsg_flags = NLM_F_REQUEST,
+		.n.nlmsg_type = RTM_DELNEXTHOP,
+		.nhm.nh_family = AF_UNSPEC,
+	};
+
+	req.n.nlmsg_seq = ++rth.seq;
+
+	addattr32(&req.n, sizeof(req), NHA_ID, id);
+
+	if (rtnl_talk(&rth, &req.n, NULL) < 0)
+		return -1;
+
+	filter.flushed++;
+	return 0;
+}
+
+struct nh_entry {
+	__u32 id;
+	unsigned int group;
+	struct nh_entry *next;
+};
+
+struct nh_entry *first, *last;
+
+static int flush_nexthop(const struct sockaddr_nl *who,
+			 struct nlmsghdr *nlh, void *arg)
+{
+	struct nhmsg *nhm = NLMSG_DATA(nlh);
+	struct rtattr *tb[NHA_MAX+1];
+	struct nh_entry *nh;
+	__u32 id = 0;
+	int len;
+
+	len = nlh->nlmsg_len - NLMSG_SPACE(sizeof(*nhm));
+	if (len < 0) {
+		fprintf(stderr, "BUG: wrong nlmsg len %d\n", len);
+		return -1;
+	}
+
+	parse_rtattr(tb, NHA_MAX, RTM_NHA(nhm), len);
+	if (tb[NHA_ID])
+		id = rta_getattr_u32(tb[NHA_ID]);
+
+	if (!id)
+		return 0;
+
+	nh = malloc(sizeof(*nh));
+	if (!nh)
+		return -1;
+
+	nh->id = id;
+	nh->group = tb[NHA_GROUP] != NULL;
+	nh->next = NULL;
+	if (!first)
+		first = nh;
+	else
+		last->next = nh;
+
+	last = nh;
+	return 0;
+}
+
+static int ipnh_flush(void *req, __u32 len, unsigned int all)
+{
+	struct nh_entry *nh;
+
+	if (send(rth.fd, req, len, 0) < 0) {
+		perror("Cannot send dump request");
+		return -2;
+	}
+
+	if (rtnl_dump_filter(&rth, flush_nexthop, stdout) < 0) {
+		fprintf(stderr, "Dump terminated\n");
+		return -2;
+	}
+
+	/* if deleting all, then remove groups first */
+	if (all) {
+		nh = first;
+		while (nh) {
+			if (nh->group)
+				delete_nexthop(nh->id);
+			nh = nh->next;
+		}
+	}
+
+	nh = first;
+	while (nh) {
+		if (!all || !nh->group)
+			delete_nexthop(nh->id);
+		nh = nh->next;
+	}
+
+	if (!filter.flushed)
+		printf("Nothing to flush\n");
+	else
+		printf("Flushed %d nexthops\n", filter.flushed);
+
+	return 0;
+}
+
+static char *nh_group_type_to_str(__u16 group_type, char *buf, size_t len)
+{
+	static const char *typestr[NEXTHOP_GRP_TYPE_MAX + 1] = {
+		"multipath",   /* NEXTHOP_GRP_TYPE_MPATH */
+	};
+
+	if (group_type < ARRAY_SIZE(typestr))
+		snprintf(buf, len-1, "%s", typestr[group_type]);
+	else
+		snprintf(buf, len-1, "<%u>", group_type);
+
+	buf[len-1] = '\0';
+
+	return buf;
+}
+
+static void print_nh_group(FILE *fp, const struct rtattr *grps_attr,
+			   const struct rtattr *gtype)
+{
+	struct nexthop_grp *nhg = RTA_DATA(grps_attr);
+	int num = RTA_PAYLOAD(grps_attr) / sizeof(*nhg);
+	__u16 group_type = NEXTHOP_GRP_TYPE_MPATH;
+	int i;
+
+	SPRINT_BUF(b1);
+
+	if (!num || num * sizeof(*nhg) != RTA_PAYLOAD(grps_attr)) {
+		fprintf(fp, "<invalid nexthop group>");
+		return;
+	}
+
+	if (gtype)
+		group_type = rta_getattr_u16(gtype);
+
+	if (is_json_context()) {
+		open_json_array(PRINT_JSON, "group");
+		for (i = 0; i < num; ++i) {
+			open_json_object(NULL);
+			print_uint(PRINT_ANY, "id", "id %u ", nhg[i].id);
+			print_uint(PRINT_ANY, "weight", "weight %u ", nhg[i].weight);
+			close_json_object();
+		}
+		close_json_array(PRINT_JSON, NULL);
+		print_string(PRINT_ANY, "type", "type %s ",
+			     nh_group_type_to_str(group_type, b1, sizeof(b1)));
+	} else {
+		fprintf(fp, "group ");
+		for (i = 0; i < num; ++i) {
+			if (i)
+				fprintf(fp, "/");
+			fprintf(fp, "%u", nhg[i].id);
+			if (num > 1 && nhg[i].weight > 1)
+				fprintf(fp, ",%u", nhg[i].weight);
+		}
+	}
+}
+
+static void print_nh_gateway(FILE *fp, const struct nhmsg *nhm,
+			      const struct rtattr *rta)
+{
+	const char *gateway = format_host_rta(nhm->nh_family, rta);
+
+	if (is_json_context())
+		print_string(PRINT_JSON, "gateway", NULL, gateway);
+	else {
+		fprintf(fp, "via ");
+		print_color_string(PRINT_FP, ifa_family_color(nhm->nh_family),
+				  NULL, "%s ", gateway);
+	}
+}
+
+int print_nexthop(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
+{
+	struct nhmsg *nhm = NLMSG_DATA(n);
+	struct rtattr *tb[NHA_MAX+1];
+	FILE *fp = (FILE *)arg;
+	int len;
+
+	SPRINT_BUF(b1);
+
+	if (n->nlmsg_type != RTM_DELNEXTHOP &&
+	    n->nlmsg_type != RTM_NEWNEXTHOP) {
+		fprintf(stderr, "Not a nexthop: %08x %08x %08x\n",
+			n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags);
+		return -1;
+	}
+
+	len = n->nlmsg_len - NLMSG_SPACE(sizeof(*nhm));
+	if (len < 0) {
+		close_json_object();
+		fprintf(stderr, "BUG: wrong nlmsg len %d\n", len);
+		return -1;
+	}
+
+	parse_rtattr(tb, NHA_MAX, RTM_NHA(nhm), len);
+
+	open_json_object(NULL);
+
+	if (n->nlmsg_type == RTM_DELROUTE)
+		print_bool(PRINT_ANY, "deleted", "Deleted ", true);
+
+	if (tb[NHA_ID])
+		print_uint(PRINT_ANY, "id", "id %u ",
+			   rta_getattr_u32(tb[NHA_ID]));
+
+	if (tb[NHA_GROUP])
+		print_nh_group(fp, tb[NHA_GROUP], tb[NHA_GROUP_TYPE]);
+
+	if (tb[NHA_GATEWAY])
+		print_nh_gateway(fp, nhm, tb[NHA_GATEWAY]);
+
+	if (tb[NHA_SADDR]) {
+		const char *psrc;
+
+		psrc = rt_addr_n2a_rta(nhm->nh_family, tb[NHA_SADDR]);
+		if (is_json_context())
+			print_string(PRINT_JSON, "src", NULL, psrc);
+		else {
+			fprintf(fp, "src ");
+			print_color_string(PRINT_FP,
+					   ifa_family_color(nhm->nh_family),
+					   NULL, "%s ", psrc);
+		}
+	}
+
+	if (tb[NHA_OIF])
+		print_rta_if(fp, tb[NHA_OIF], "dev");
+
+	if (tb[NHA_BLACKHOLE])
+		print_null(PRINT_ANY, "blackhole", "blackhole", NULL);
+
+	if (nhm->nh_protocol != RTPROT_UNSPEC || show_details > 0) {
+		print_string(PRINT_ANY, "protocol", "proto %s ",
+			     rtnl_rtprot_n2a(nhm->nh_protocol, b1, sizeof(b1)));
+	}
+
+	if (nhm->nh_scope != RT_SCOPE_UNIVERSE || show_details > 0) {
+		print_string(PRINT_ANY, "scope", "scope %s ",
+			     rtnl_rtscope_n2a(nhm->nh_scope, b1, sizeof(b1)));
+	}
+
+	if (tb[NHA_OIF])
+		print_rt_flags(fp, nhm->nh_flags);
+
+	if (tb[NHA_FLOW])
+		print_rta_flow(fp, tb[NHA_FLOW]);
+
+	print_string(PRINT_FP, NULL, "%s", "\n");
+	close_json_object();
+	fflush(fp);
+
+	return 0;
+}
+
+static int add_nh_group_attr(struct nlmsghdr *n, int maxlen, char *argv)
+{
+	struct nexthop_grp *grps;
+	int count = 0, i;
+	char *sep, *wsep;
+
+	if (*argv != '\0')
+		count = 1;
+
+	/* separator is '/' */
+	sep = strchr(argv, '/');
+	while (sep) {
+		count++;
+		sep = strchr(sep + 1, '/');
+	}
+
+	if (count == 0)
+		return -1;
+
+	grps = calloc(count, sizeof(*grps));
+	if (!grps)
+		return -1;
+
+	for (i = 0; i < count; ++i) {
+		sep = strchr(argv, '/');
+		if (sep)
+			*sep = '\0';
+
+		wsep = strchr(argv, ',');
+		if (wsep)
+			*wsep = '\0';
+
+		if (get_unsigned(&grps[i].id, argv, 0))
+			return -1;
+		if (wsep) {
+			wsep++;
+			if (get_unsigned(&grps[i].weight, wsep, 0))
+				return -1;
+		}
+
+		if (!sep)
+			break;
+
+		argv = sep + 1;
+	}
+
+	return addattr_l(n, maxlen, NHA_GROUP, grps, count * sizeof(*grps));
+}
+
+static int ipnh_modify(int cmd, unsigned int flags, int argc, char **argv)
+{
+	struct {
+		struct nlmsghdr	n;
+		struct nhmsg	nhm;
+		char		buf[1024];
+	} req = {
+		.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)),
+		.n.nlmsg_flags = NLM_F_REQUEST | flags,
+		.n.nlmsg_type = cmd,
+		.nhm.nh_family = AF_UNSPEC,
+	};
+	__u32 nh_flags = 0;
+	__u16 gtype = NEXTHOP_GRP_TYPE_MAX + 1;
+
+	while (argc > 0) {
+		if (!strcmp(*argv, "id")) {
+			__u32 id;
+
+			NEXT_ARG();
+			if (get_unsigned(&id, *argv, 0))
+				invarg("invalid id value", *argv);
+			addattr32(&req.n, sizeof(req), NHA_ID, id);
+		} else if (!strcmp(*argv, "dev")) {
+			int ifindex;
+
+			NEXT_ARG();
+			ifindex = ll_name_to_index(*argv);
+			if (!ifindex)
+				invarg("Device does not exist\n", *argv);
+			addattr32(&req.n, sizeof(req), NHA_OIF, ifindex);
+		} else if (strcmp(*argv, "via") == 0) {
+			inet_prefix addr;
+			int family;
+
+			NEXT_ARG();
+			family = read_family(*argv);
+			if (family == AF_UNSPEC)
+				family = req.nhm.nh_family;
+			else
+				NEXT_ARG();
+			get_addr(&addr, *argv, family);
+			if (req.nhm.nh_family == AF_UNSPEC)
+				req.nhm.nh_family = addr.family;
+			else if (req.nhm.nh_family != addr.family)
+				invarg("address family mismatch\n", *argv);
+			addattr_l(&req.n, sizeof(req), NHA_GATEWAY,
+				  &addr.data, addr.bytelen);
+		} else if (!strcmp(*argv, "blackhole")) {
+			addattr_l(&req.n, sizeof(req), NHA_BLACKHOLE, NULL, 0);
+		} else if (!strcmp(*argv, "onlink")) {
+			nh_flags |= RTNH_F_ONLINK;
+		} else if (!strcmp(*argv, "realms")) {
+			__u32 realm;
+
+			NEXT_ARG();
+			if (get_rt_realms_or_raw(&realm, *argv))
+				invarg("\"realm\" value is invalid\n", *argv);
+			addattr32(&req.n, sizeof(req), NHA_FLOW, realm);
+		} else if (!strcmp(*argv, "group")) {
+			NEXT_ARG();
+
+			if (add_nh_group_attr(&req.n, sizeof(req), *argv))
+				invarg("\"group\" value is invalid\n", *argv);
+		} else if (!strcmp(*argv, "multipath") ||
+			   !strcmp(*argv, "mpath")) {
+			gtype = NEXTHOP_GRP_TYPE_MPATH;
+		} else if (!strcmp(*argv, "table")) {
+			__u32 tb_id;
+
+			NEXT_ARG();
+			if (get_unsigned(&tb_id, *argv, 0))
+				invarg("invalid table value", *argv);
+			addattr32(&req.n, sizeof(req), NHA_TABLE_ID, tb_id);
+		} else if (strcmp(*argv, "vrf") == 0) {
+			__u32 tb_id;
+
+			NEXT_ARG();
+			tb_id = ipvrf_get_table(*argv);
+			if (tb_id == 0)
+				invarg("Invalid VRF", *argv);
+			addattr32(&req.n, sizeof(req), NHA_TABLE_ID, tb_id);
+		} else if (strcmp(*argv, "help") == 0) {
+			usage();
+		} else {
+			invarg("", *argv);
+		}
+		argc--; argv++;
+	}
+
+	if (gtype <= NEXTHOP_GRP_TYPE_MAX)
+		addattr16(&req.n, sizeof(req), NHA_GROUP_TYPE, gtype);
+
+	req.nhm.nh_flags = nh_flags;
+
+	if (rtnl_talk(&rth, &req.n, NULL) < 0)
+		return -2;
+
+	return 0;
+}
+
+static int ipnh_get_id(__u32 id)
+{
+	struct {
+		struct nlmsghdr	n;
+		struct nhmsg	nhm;
+		char		buf[1024];
+	} req = {
+		.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)),
+		.n.nlmsg_flags = NLM_F_REQUEST,
+		.n.nlmsg_type  = RTM_GETNEXTHOP,
+		.nhm.nh_family = preferred_family,
+	};
+	struct nlmsghdr *answer;
+
+	addattr32(&req.n, sizeof(req), NHA_ID, id);
+
+	if (rtnl_talk(&rth, &req.n, &answer) < 0)
+		return -2;
+
+	new_json_obj(json);
+
+	if (print_nexthop(NULL, answer, (void *)stdout) < 0) {
+		free(answer);
+		return -1;
+	}
+
+	delete_json_obj();
+	fflush(stdout);
+
+	free(answer);
+
+	return 0;
+}
+
+static int ipnh_list_flush(int argc, char **argv, int action)
+{
+	struct {
+		struct nlmsghdr	n;
+		struct nhmsg	nhm;
+		char		buf[256];
+	} req = {
+		.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct nhmsg)),
+		.n.nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST,
+		.n.nlmsg_type  = RTM_GETNEXTHOP,
+		.n.nlmsg_seq = ++rth.seq,
+		.nhm.nh_family = preferred_family,
+	};
+	unsigned int master = 0;
+	unsigned int all = (argc == 0);
+
+	rth.dump = req.n.nlmsg_seq;
+
+	while (argc > 0) {
+		if (!matches(*argv, "dev")) {
+			unsigned int ifindex;
+
+			NEXT_ARG();
+			ifindex = ll_name_to_index(*argv);
+			if (!ifindex)
+				invarg("Device does not exist\n", *argv);
+			addattr32(&req.n, sizeof(req), NHA_OIF, ifindex);
+		} else if (!matches(*argv, "groups")) {
+			addattr_l(&req.n, sizeof(req), NHA_GROUPS, NULL, 0);
+		} else if (!matches(*argv, "master")) {
+			NEXT_ARG();
+			master = ll_name_to_index(*argv);
+			if (!master)
+				invarg("Device does not exist\n", *argv);
+		} else if (matches(*argv, "vrf") == 0) {
+			NEXT_ARG();
+			master = ll_name_to_index(*argv);
+			if (!master)
+				invarg("VRF does not exist\n", *argv);
+			if (!name_is_vrf(*argv))
+				invarg("Invalid VRF\n", *argv);
+		} else if (!strcmp(*argv, "id")) {
+			__u32 id;
+
+			NEXT_ARG();
+			if (get_unsigned(&id, *argv, 0))
+				invarg("invalid id value", *argv);
+			return ipnh_get_id(id);
+		} else if (matches(*argv, "help") == 0) {
+			usage();
+		} else {
+			invarg("", *argv);
+		}
+		argc--; argv++;
+	}
+
+	if (master)
+		addattr32(&req.n, sizeof(req), NHA_MASTER, master);
+
+	if (action == IPNH_FLUSH)
+		return ipnh_flush(&req, req.n.nlmsg_len, all);
+
+	if (send(rth.fd, &req, req.n.nlmsg_len, 0) < 0) {
+		perror("Cannot send dump request");
+		return -2;
+	}
+
+	new_json_obj(json);
+
+	if (rtnl_dump_filter(&rth, print_nexthop, stdout) < 0) {
+		fprintf(stderr, "Dump terminated\n");
+		return -2;
+	}
+
+	delete_json_obj();
+	fflush(stdout);
+
+	return 0;
+}
+
+static int ipnh_get(int argc, char **argv)
+{
+	__u32 id = 0;
+
+	while (argc > 0) {
+		if (!strcmp(*argv, "id")) {
+			NEXT_ARG();
+			if (get_unsigned(&id, *argv, 0))
+				invarg("invalid id value", *argv);
+		}
+		if (matches(*argv, "help") == 0)
+			usage();
+
+		argc--; argv++;
+	}
+
+	if (!id) {
+		usage();
+		return -1;
+	}
+
+	return ipnh_get_id(id);
+}
+
+int do_ipnh(int argc, char **argv)
+{
+	if (argc < 1)
+		return ipnh_list_flush(0, NULL, IPNH_LIST);
+
+	if (!matches(*argv, "add"))
+		return ipnh_modify(RTM_NEWNEXTHOP, NLM_F_CREATE|NLM_F_EXCL,
+				   argc-1, argv+1);
+	if (!matches(*argv, "change") || !strcmp(*argv, "chg"))
+		return ipnh_modify(RTM_NEWNEXTHOP, NLM_F_REPLACE,
+				   argc-1, argv+1);
+	if (!matches(*argv, "replace"))
+		return ipnh_modify(RTM_NEWNEXTHOP, NLM_F_CREATE|NLM_F_REPLACE,
+				   argc-1, argv+1);
+	if (!matches(*argv, "delete"))
+		return ipnh_modify(RTM_DELNEXTHOP, 0, argc-1, argv+1);
+
+	if (!matches(*argv, "list") ||
+	    !matches(*argv, "show") ||
+	    !matches(*argv, "lst"))
+		return ipnh_list_flush(argc-1, argv+1, IPNH_LIST);
+
+	if (!matches(*argv, "get"))
+		return ipnh_get(argc-1, argv+1);
+
+	if (!matches(*argv, "flush"))
+		return ipnh_list_flush(argc-1, argv+1, IPNH_FLUSH);
+
+	if (!matches(*argv, "help"))
+		usage();
+
+	fprintf(stderr,
+		"Command \"%s\" is unknown, try \"ip nexthop help\".\n", *argv);
+	exit(-1);
+}
diff --git a/ip/iproute.c b/ip/iproute.c
index 30833414a3f7..0af72c2eccca 100644
--- a/ip/iproute.c
+++ b/ip/iproute.c
@@ -349,7 +349,7 @@ static void print_rtax_features(FILE *fp, unsigned int features)
 			    "features", "0x%x ", of);
 }
 
-static void print_rt_flags(FILE *fp, unsigned int flags)
+void print_rt_flags(FILE *fp, unsigned int flags)
 {
 	open_json_array(PRINT_JSON,
 			is_json_context() ?  "flags" : "");
@@ -394,8 +394,8 @@ static void print_rt_pref(FILE *fp, unsigned int pref)
 	}
 }
 
-static void print_rta_if(FILE *fp, const struct rtattr *rta,
-			const char *prefix)
+void print_rta_if(FILE *fp, const struct rtattr *rta,
+		  const char *prefix)
 {
 	const char *ifname = ll_index_to_name(rta_getattr_u32(rta));
 
@@ -492,7 +492,7 @@ static void print_rta_cacheinfo(FILE *fp, const struct rta_cacheinfo *ci)
 	}
 }
 
-static void print_rta_flow(FILE *fp, const struct rtattr *rta)
+void print_rta_flow(FILE *fp, const struct rtattr *rta)
 {
 	__u32 to = rta_getattr_u32(rta);
 	__u32 from = to >> 16;
@@ -823,6 +823,10 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
 			     rtnl_dsfield_n2a(r->rtm_tos, b1, sizeof(b1)));
 	}
 
+	if (tb[RTA_NH_ID])
+		print_uint(PRINT_ANY, "nhid", "nhid %u ",
+			   rta_getattr_u32(tb[RTA_NH_ID]));
+
 	if (tb[RTA_GATEWAY] && filter.rvia.bitlen != host_len)
 		print_rta_gateway(fp, r, tb[RTA_GATEWAY]);
 
@@ -1351,6 +1355,13 @@ static int iproute_modify(int cmd, unsigned int flags, int argc, char **argv)
 		} else if (strcmp(*argv, "nexthop") == 0) {
 			nhs_ok = 1;
 			break;
+		} else if (!strcmp(*argv, "nhid")) {
+			__u32 id;
+
+			NEXT_ARG();
+			if (get_u32(&id, *argv, 0))
+				invarg("\"id\" value is invalid\n", *argv);
+			addattr32(&req.n, sizeof(req), RTA_NH_ID, id);
 		} else if (matches(*argv, "protocol") == 0) {
 			__u32 prot;
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH RFC net-next 13/18] net/ipv4: Convert existing use of fib_info to new helpers
From: dsahern @ 2018-09-01  0:49 UTC (permalink / raw)
  To: netdev; +Cc: roopa, sharpd, idosch, davem, David Ahern
In-Reply-To: <20180901004954.7145-1-dsahern@kernel.org>

From: David Ahern <dsahern@gmail.com>

Remove direct accesses to fi->fib_nh in favor of the helpers added
in the previous patch.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_router.c    |  4 +++-
 drivers/net/ethernet/rocker/rocker_ofdpa.c           | 20 ++++++++++++++------
 include/net/ip_fib.h                                 |  1 -
 net/ipv4/fib_frontend.c                              |  3 ++-
 net/ipv4/fib_rules.c                                 |  3 ++-
 net/ipv4/fib_semantics.c                             | 12 ++++++++----
 net/ipv4/fib_trie.c                                  | 19 +++++++++++--------
 7 files changed, 40 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 2ab9cf25a08a..3fcac0b6fa92 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -28,6 +28,7 @@
 #include <net/ipv6.h>
 #include <net/fib_notifier.h>
 #include <net/switchdev.h>
+#include <net/nexthop.h>
 
 #include "spectrum.h"
 #include "core.h"
@@ -4121,12 +4122,13 @@ mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
 			     struct mlxsw_sp_fib_entry *fib_entry)
 {
 	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
-	struct net_device *dev = fen_info->fi->fib_dev;
 	struct mlxsw_sp_ipip_entry *ipip_entry;
 	struct fib_info *fi = fen_info->fi;
+	struct net_device *dev;
 
 	switch (fen_info->type) {
 	case RTN_LOCAL:
+		dev = fib_info_nh_dev(fi);
 		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
 						 MLXSW_SP_L3_PROTO_IPV4, dip);
 		if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index 6473cc68c2d5..c05d35945ea7 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -23,6 +23,7 @@
 #include <net/switchdev.h>
 #include <net/ip_fib.h>
 #include <net/arp.h>
+#include <net/nexthop.h>
 
 #include "rocker.h"
 #include "rocker_tlv.h"
@@ -2286,8 +2287,8 @@ static int ofdpa_port_fib_ipv4(struct ofdpa_port *ofdpa_port,  __be32 dst,
 
 	/* XXX support ECMP */
 
-	nh = fi->fib_nh;
-	nh_on_port = (fi->fib_dev == ofdpa_port->dev);
+	nh = fib_info_nh(fi, 0);
+	nh_on_port = (nh->nh_dev == ofdpa_port->dev);
 	has_gw = !!nh->nh_gw;
 
 	if (has_gw && nh_on_port) {
@@ -2747,11 +2748,13 @@ static int ofdpa_fib4_add(struct rocker *rocker,
 {
 	struct ofdpa *ofdpa = rocker->wpriv;
 	struct ofdpa_port *ofdpa_port;
+	struct net_device *dev;
 	int err;
 
 	if (ofdpa->fib_aborted)
 		return 0;
-	ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker);
+	dev = fib_info_nh_dev(fen_info->fi);
+	ofdpa_port = ofdpa_port_dev_lower_find(dev, rocker);
 	if (!ofdpa_port)
 		return 0;
 	err = ofdpa_port_fib_ipv4(ofdpa_port, htonl(fen_info->dst),
@@ -2768,10 +2771,12 @@ static int ofdpa_fib4_del(struct rocker *rocker,
 {
 	struct ofdpa *ofdpa = rocker->wpriv;
 	struct ofdpa_port *ofdpa_port;
+	struct net_device *dev;
 
 	if (ofdpa->fib_aborted)
 		return 0;
-	ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker);
+	dev = fib_info_nh_dev(fen_info->fi);
+	ofdpa_port = ofdpa_port_dev_lower_find(dev, rocker);
 	if (!ofdpa_port)
 		return 0;
 	fen_info->fi->fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
@@ -2794,11 +2799,14 @@ static void ofdpa_fib4_abort(struct rocker *rocker)
 
 	spin_lock_irqsave(&ofdpa->flow_tbl_lock, flags);
 	hash_for_each_safe(ofdpa->flow_tbl, bkt, tmp, flow_entry, entry) {
+		struct net_device *dev;
+
 		if (flow_entry->key.tbl_id !=
 		    ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING)
 			continue;
-		ofdpa_port = ofdpa_port_dev_lower_find(flow_entry->fi->fib_dev,
-						       rocker);
+
+		dev = fib_info_nh_dev(flow_entry->fi);
+		ofdpa_port = ofdpa_port_dev_lower_find(dev, rocker);
 		if (!ofdpa_port)
 			continue;
 		flow_entry->fi->fib_nh->nh_flags &= ~RTNH_F_OFFLOAD;
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index e39f55f3c3d8..c59e0f1ba59b 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -129,7 +129,6 @@ struct fib_info {
 	int			fib_nhs;
 	struct rcu_head		rcu;
 	struct fib_nh		fib_nh[0];
-#define fib_dev		fib_nh[0].nh_dev
 };
 
 
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index ec6ae186d4b0..c483453bf037 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -35,6 +35,7 @@
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/slab.h>
+#include <net/nexthop.h>
 
 #include <net/ip.h>
 #include <net/protocol.h>
@@ -234,7 +235,7 @@ static inline unsigned int __inet_dev_addr_type(struct net *net,
 	if (table) {
 		ret = RTN_UNICAST;
 		if (!fib_table_lookup(table, &fl4, &res, FIB_LOOKUP_NOREF)) {
-			if (!dev || dev == res.fi->fib_dev)
+			if (!dev || dev == fib_info_nh_dev(res.fi))
 				ret = res.type;
 		}
 	}
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index f8eb78d042a4..6808883af694 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -32,6 +32,7 @@
 #include <net/tcp.h>
 #include <net/ip_fib.h>
 #include <net/fib_rules.h>
+#include <net/nexthop.h>
 
 struct fib4_rule {
 	struct fib_rule		common;
@@ -146,7 +147,7 @@ static bool fib4_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg
 	struct net_device *dev = NULL;
 
 	if (result->fi)
-		dev = result->fi->fib_dev;
+		dev = fib_info_nh_dev(result->fi);
 
 	/* do not accept result if the route does
 	 * not meet the required prefix length
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 53e38ecfdd58..0cd536ad1761 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -45,6 +45,7 @@
 #include <net/rtnh.h>
 #include <net/lwtunnel.h>
 #include <net/fib_notifier.h>
+#include <net/nexthop.h>
 
 #include "fib_lookup.h"
 
@@ -447,10 +448,11 @@ static int fib_detect_death(struct fib_info *fi, int order,
 			    struct fib_info **last_resort, int *last_idx,
 			    int dflt)
 {
+	struct fib_nh *fnh = fib_info_nh(fi, 0);
 	struct neighbour *n;
 	int state = NUD_NONE;
 
-	n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
+	n = neigh_lookup(&arp_tbl, &fnh->nh_gw, fnh->nh_dev);
 	if (n) {
 		state = n->nud_state;
 		neigh_release(n);
@@ -713,7 +715,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
 	if (cfg->fc_oif || cfg->fc_gw) {
 		if (cfg->fc_encap) {
 			if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
-					    fi->fib_nh, cfg, extack))
+					    fib_info_nh(fi, 0), cfg, extack))
 				return 1;
 		}
 #ifdef CONFIG_IP_ROUTE_CLASSID
@@ -1571,6 +1573,7 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
 
 	hlist_for_each_entry_rcu(fa, fa_head, fa_list) {
 		struct fib_info *next_fi = fa->fa_info;
+		struct fib_nh *fnh;
 
 		if (fa->fa_slen != slen)
 			continue;
@@ -1592,8 +1595,9 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
 		if (next_fi->fib_scope != res->scope ||
 		    fa->fa_type != RTN_UNICAST)
 			continue;
-		if (!next_fi->fib_nh[0].nh_gw ||
-		    next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
+
+		fnh = fib_info_nh(next_fi, 0);
+		if (!fnh->nh_gw || fnh->nh_scope != RT_SCOPE_LINK)
 			continue;
 
 		fib_alias_accessed(fa);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 51e7b38f3a7b..c6aab049a4ac 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -83,6 +83,7 @@
 #include <net/sock.h>
 #include <net/ip_fib.h>
 #include <net/fib_notifier.h>
+#include <net/nexthop.h>
 #include <trace/events/fib.h>
 #include "fib_lookup.h"
 
@@ -2621,13 +2622,13 @@ static void fib_route_seq_stop(struct seq_file *seq, void *v)
 	rcu_read_unlock();
 }
 
-static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
+static unsigned int fib_flag_trans(int type, __be32 mask, struct fib_info *fi)
 {
 	unsigned int flags = 0;
 
 	if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
 		flags = RTF_REJECT;
-	if (fi && fi->fib_nh->nh_gw)
+	if (fi && fib_info_nh_gw(fi))
 		flags |= RTF_GATEWAY;
 	if (mask == htonl(0xFFFFFFFF))
 		flags |= RTF_HOST;
@@ -2659,7 +2660,7 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 	prefix = htonl(l->key);
 
 	hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) {
-		const struct fib_info *fi = fa->fa_info;
+		struct fib_info *fi = fa->fa_info;
 		__be32 mask = inet_make_mask(KEYLENGTH - fa->fa_slen);
 		unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
 
@@ -2672,26 +2673,28 @@ static int fib_route_seq_show(struct seq_file *seq, void *v)
 
 		seq_setwidth(seq, 127);
 
-		if (fi)
+		if (fi) {
+			struct net_device *dev = fib_info_nh_dev(fi);
+
 			seq_printf(seq,
 				   "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
 				   "%d\t%08X\t%d\t%u\t%u",
-				   fi->fib_dev ? fi->fib_dev->name : "*",
+				   dev ? dev->name : "*",
 				   prefix,
-				   fi->fib_nh->nh_gw, flags, 0, 0,
+				   fib_info_nh_gw(fi), flags, 0, 0,
 				   fi->fib_priority,
 				   mask,
 				   (fi->fib_advmss ?
 				    fi->fib_advmss + 40 : 0),
 				   fi->fib_window,
 				   fi->fib_rtt >> 3);
-		else
+		} else {
 			seq_printf(seq,
 				   "*\t%08X\t%08X\t%04X\t%d\t%u\t"
 				   "%d\t%08X\t%d\t%u\t%u",
 				   prefix, 0, flags, 0, 0, 0,
 				   mask, 0, 0, 0);
-
+		}
 		seq_pad(seq, '\n');
 	}
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH RFC net-next 01/18] net: Rename net/nexthop.h net/rtnh.h
From: dsahern @ 2018-09-01  0:49 UTC (permalink / raw)
  To: netdev; +Cc: roopa, sharpd, idosch, davem, David Ahern
In-Reply-To: <20180901004954.7145-1-dsahern@kernel.org>

From: David Ahern <dsahern@gmail.com>

The header contains rtnh_ macros so rename the file accordingly.
Allows next patch to use the nexthop.h name.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/{nexthop.h => rtnh.h} | 4 ++--
 net/core/lwtunnel.c               | 2 +-
 net/decnet/dn_fib.c               | 2 +-
 net/ipv4/fib_semantics.c          | 2 +-
 net/ipv4/ipmr.c                   | 2 +-
 net/ipv6/route.c                  | 2 +-
 net/mpls/af_mpls.c                | 2 +-
 7 files changed, 8 insertions(+), 8 deletions(-)
 rename include/net/{nexthop.h => rtnh.h} (94%)

diff --git a/include/net/nexthop.h b/include/net/rtnh.h
similarity index 94%
rename from include/net/nexthop.h
rename to include/net/rtnh.h
index 902ff382a6dc..aa2cfc508f7c 100644
--- a/include/net/nexthop.h
+++ b/include/net/rtnh.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __NET_NEXTHOP_H
-#define __NET_NEXTHOP_H
+#ifndef __NET_RTNH_H
+#define __NET_RTNH_H
 
 #include <linux/rtnetlink.h>
 #include <net/netlink.h>
diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c
index 0b171756453c..80c30cd5744a 100644
--- a/net/core/lwtunnel.c
+++ b/net/core/lwtunnel.c
@@ -26,7 +26,7 @@
 #include <net/lwtunnel.h>
 #include <net/rtnetlink.h>
 #include <net/ip6_fib.h>
-#include <net/nexthop.h>
+#include <net/rtnh.h>
 
 #ifdef CONFIG_MODULES
 
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index f78fe58eafc8..3757a56bbcbd 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -42,7 +42,7 @@
 #include <net/dn_fib.h>
 #include <net/dn_neigh.h>
 #include <net/dn_dev.h>
-#include <net/nexthop.h>
+#include <net/rtnh.h>
 
 #define RT_MIN_TABLE 1
 
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f3c89ccf14c5..93524a746ca8 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -42,7 +42,7 @@
 #include <net/sock.h>
 #include <net/ip_fib.h>
 #include <net/netlink.h>
-#include <net/nexthop.h>
+#include <net/rtnh.h>
 #include <net/lwtunnel.h>
 #include <net/fib_notifier.h>
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 5660adcf7a04..564d4fd5a92b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -66,7 +66,7 @@
 #include <net/netlink.h>
 #include <net/fib_rules.h>
 #include <linux/netconf.h>
-#include <net/nexthop.h>
+#include <net/rtnh.h>
 #include <net/switchdev.h>
 
 struct ipmr_rule {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c4ea13e8360b..07ed7812c6b4 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -59,7 +59,7 @@
 #include <net/xfrm.h>
 #include <net/netevent.h>
 #include <net/netlink.h>
-#include <net/nexthop.h>
+#include <net/rtnh.h>
 #include <net/lwtunnel.h>
 #include <net/ip_tunnels.h>
 #include <net/l3mdev.h>
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 7a4de6d618b1..d066e5e9b76c 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -23,7 +23,7 @@
 #include <net/ipv6.h>
 #endif
 #include <net/addrconf.h>
-#include <net/nexthop.h>
+#include <net/rtnh.h>
 #include "internal.h"
 
 /* max memory we will use for mpls_route */
-- 
2.11.0

^ permalink raw reply related

* [PATCH RFC net-next 00/18] net: Improve route scalability via support for nexthop objects
From: dsahern @ 2018-09-01  0:49 UTC (permalink / raw)
  To: netdev; +Cc: roopa, sharpd, idosch, davem, David Ahern

From: David Ahern <dsahern@gmail.com>

As mentioned at netconf in Seoul, we would like to introduce nexthops as
independent objects from the routes to better align with both routing
daemons and hardware and to improve route insertion times into the kernel.

This series adds nexthop objects with their own lifecycle. The model
retains a lot of the established semantics from routes and re-uses some
of the data structures like fib_nh and fib6_nh to more easily align with
the existing code. One difference with nexthop objects is the behavior
better aligns with the target user - routing daemons and switch ASICs.
Specifically, with the exception of the blackhole nexthop, all nexthops
must reference a netdevice (or have a gateway that resolves to a device)
and the device must be admin up with carrier.

Prefixes are then installed pointing to the nexthop by id:
  { prefix } --> { nexthop }  --> { gateway, device }

The nexthop object contains the gateway and device reference.

Benchmarks
The following data shows the route insert time for 720,022 routes (a full
IPv4 internet feed from August 28th). "current" means the current code
where a route insert specifies the device and gateway inline with the
prefix; the "nexthop" columns mean use of the nexthop objects.

         1-hop          1-hop     |    2-hops       2-hops
        current        nexthop    |   current      nexthop
        --------------------------|-------------------------
real    0m21.872s      0m12.982s  |   0m28.723s    0m12.406s
user    0m2.929s       0m1.816s   |   0m3.966s     0m1.935s
sys     0m13.469s      0m6.010s   |   0m18.992s    0m5.913s

With nexthop objects the time to insert the routes is reduced by more
than 30% with the kernel time cut in half. The current model has a route
insertion rate of about 32,000 prefixes / second and with nexthop objects
that increases to a little over 55,000 prefixes/second.

For routes with multiple nexthops the install time is cut by more than
half with system time reduce by a factor of 3. Further, with nexthop
objects insert times for multipath routes drops down to the same as
single path routes since the multipath spec is given once (ie., with the
current model, the time to insert routes increases with the number of
paths in the route compared to nexthop objects where the number of paths
is handled once and the prefixes referencing it are installed in constant
time.

The difference between real and system times shows there is room for
improvement with the trie implementation. As an example, increasing the
sync_pages from 128 to 1024 delays the call to synchronize_rcu increasing
the insert rate to more than 78,000 prefixes/sec!

Some key features:
1. Allows atomic replace of any nexthop object - a nexthop or a group.
   This allows existing route entries to have their nexthop updated
   without the overhead of removing and re-inserting (or replacing)
   them. Instead, one update of the nexthop object implicitly updates
   all routes referencing it.

   One limitation with the atomic replace is that a nexthop group can
   only be replaced with a new group spec and similarly a nexthop can
   only be replaced by a nexthop spec. Specifically, a nexthop id can
   not move between a single nexthop and a group nexthop.

2. Blackhole nexthop: a nexthop object can be designated a blackhole
   which means any lookups that resolve to it, packets are dropped as
   if the lookup failed with the result RTN_BLACKHOLE. Blackhole nexthops
   can not be used with nexthop groups. Combined with atomic replace
   this allows routes to be installed pointing to a blackhole nexthop
   and then switched to an actual gateway with a single nexthop replace
   command (or vice versa, a gateway nexthop is flipped to a blackhole).

3. Nexthop groups for multipath routes. A nexthop group is a nexthop
   that references other nexthops. A multipath group can not be used
   as a nexthop in another nexthop group (ie., groups can not be nested).

4. Multipath routes for IPv6 with device only nexthops. There is a
   demonstrated need for this feature and the existing route semantics
   do not allow it. This series provides a means for that end - create a
   nexthop that has a device only specification.

5. Admin and carrier up are required. If the device goes down (admin or
   carrier) the nexthop is removed in which case routes referencing the
   nexthop are evicted and any nexthop groups referencing it are adjusted.

6. Follow on patches will allow IPv6 nexthops with IPv4 routes for users
   wanting support of RFC 5549.

7. Future extensions: active / backup nexthop. The nexthop groups are
   structured to allow a new group type to be added. One example is a
   group where a nexthop has a preferred device and gateway, but should
   the device go down or the gateway not resolve, the backup nexthop is
   used.

Additional Benefits
- smaller route notifications - messages contain a single nexthop id versus
  the detailed nexthop specification. This is especially noticeable as the
  number of paths increases. Smaller messages have a reduced load on
  userspace as well.

- smaller memory footprint for IPv6 routes.

Examples
1. Single path
    $ ip nexthop add id 1 via 10.99.1.2 dev veth1
    $ ip route add 10.1.1.0/24 nhid 1

    $ ip next ls
    id 1 via 10.99.1.2 src 10.99.1.1 dev veth1 scope link

    $ ip ro ls
    10.1.1.0/24 nhid 1 scope link
    ...

2. ECMP
    $ ip nexthop add id 2 via 10.99.3.2 dev veth3
    $ ip nexthop add id 1001 group 1/2
      --> creates a nexthop group with 2 component nexthops:
          id 1 and id 2 both the same weight

    $ ip route add 10.1.2.0/24 nhid 1001

    $ ip next ls
    id 1 via 10.99.1.2 src 10.99.1.1 dev veth1 scope link
    id 2 via 10.99.3.2 src 10.99.3.1 dev veth3 scope link
    id 1001 group 1/2

    $ ip ro ls
    10.1.1.0/24 nhid 1 scope link
    10.1.2.0/24 nhid 1001 scope link
    ...

3. Weighted multipath
    $ ip nexthop add id 1002 group 1,10/2,20
      --> creates a nexthop group with 2 component nexthops:
          id 1 with a weight of 10 and id 2 with a weight of 20

    $ ip route add 10.1.3.0/24 nhid 1002

    $  ip next ls
    id 1 via 10.99.1.2 src 10.99.1.1 dev veth1 scope link
    id 2 via 10.99.3.2 src 10.99.3.1 dev veth3 scope link
    id 1001 group 1/2
    id 1002 group 1,10/2,20

    $ ip ro ls
    10.1.1.0/24 nhid 1 scope link
    10.1.2.0/24 nhid 1001 scope link
    10.1.3.0/24 nhid 1002 scope link
    ...

Open Items
There is long to-do list before this is ready (e.g., IPv6 multipath, lwt
encap, and updating mlxsw). The point of this RFC is to get comments on
the API and overall idea. Specifically, any interested parties should
think about the API, the objects, the workflow, how it fits and
possibility for future extensions.

David Ahern (18):
  net: Rename net/nexthop.h net/rtnh.h
  net: ipv4: export fib_good_nh and fib_flush
  net/ipv4: export fib_info_update_nh_saddr
  net/ipv4: export fib_check_nh
  net/ipv4: Define fib_get_nhs when CONFIG_IP_ROUTE_MULTIPATH is
    disabled
  net/ipv4: Create init and release helpers for fib_nh
  net: ipv4: Add fib_nh to fib_result
  net/ipv4: Move device validation to helper
  net/ipv6: Create init and release helpers for fib6_nh
  net/ipv6: Make fib6_nh optional at the end of fib6_info
  net: Initial nexthop code
  net/ipv4: Add nexthop helpers for ipv4 integration
  net/ipv4: Convert existing use of fib_info to new helpers
  net/ipv4: Allow routes to use nexthop objects
  net/ipv6: Use helpers to access fib6_nh data
  net/ipv6: Allow routes to use nexthop objects
  net: Add support for nexthop groups
  net/ipv4: Optimization for fib_info lookup

 .../net/ethernet/mellanox/mlxsw/spectrum_router.c  |    4 +-
 drivers/net/ethernet/rocker/rocker_ofdpa.c         |   20 +-
 include/net/addrconf.h                             |    5 +
 include/net/ip6_fib.h                              |   22 +-
 include/net/ip6_route.h                            |   12 +-
 include/net/ip_fib.h                               |   39 +-
 include/net/net_namespace.h                        |    2 +
 include/net/netns/nexthop.h                        |   18 +
 include/net/nexthop.h                              |  253 +++-
 include/net/rtnh.h                                 |   34 +
 include/trace/events/fib6.h                        |   15 +-
 include/uapi/linux/nexthop.h                       |   56 +
 include/uapi/linux/rtnetlink.h                     |    8 +
 net/core/filter.c                                  |   13 +-
 net/core/lwtunnel.c                                |    2 +-
 net/decnet/dn_fib.c                                |    2 +-
 net/ipv4/Makefile                                  |    2 +-
 net/ipv4/fib_frontend.c                            |   60 +-
 net/ipv4/fib_rules.c                               |    3 +-
 net/ipv4/fib_semantics.c                           |  433 ++++--
 net/ipv4/fib_trie.c                                |   54 +-
 net/ipv4/ipmr.c                                    |    2 +-
 net/ipv4/nexthop.c                                 | 1541 ++++++++++++++++++++
 net/ipv4/route.c                                   |   34 +-
 net/ipv6/addrconf.c                                |    5 +-
 net/ipv6/addrconf_core.c                           |    9 +
 net/ipv6/af_inet6.c                                |    1 +
 net/ipv6/ip6_fib.c                                 |   27 +-
 net/ipv6/ndisc.c                                   |   15 +-
 net/ipv6/route.c                                   |  474 +++---
 net/mpls/af_mpls.c                                 |    2 +-
 security/selinux/nlmsgtab.c                        |    5 +-
 32 files changed, 2690 insertions(+), 482 deletions(-)
 create mode 100644 include/net/netns/nexthop.h
 create mode 100644 include/net/rtnh.h
 create mode 100644 include/uapi/linux/nexthop.h
 create mode 100644 net/ipv4/nexthop.c

-- 
2.11.0

^ permalink raw reply

* [PATCH RFC net-next 15/18] net/ipv6: Use helpers to access fib6_nh data
From: dsahern @ 2018-09-01  0:49 UTC (permalink / raw)
  To: netdev; +Cc: roopa, sharpd, idosch, davem, David Ahern
In-Reply-To: <20180901004954.7145-1-dsahern@kernel.org>

From: David Ahern <dsahern@gmail.com>

Similar to ipv4, add helpers for accessing fib6_nh data and convert
existing users.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip6_fib.h       | 11 ----------
 include/net/ip6_route.h     |  2 ++
 include/net/nexthop.h       | 40 +++++++++++++++++++++++++++++++++++
 include/trace/events/fib6.h |  2 +-
 net/core/filter.c           | 11 +++++++---
 net/ipv6/route.c            | 51 ++++++++++++++++++++++++++-------------------
 6 files changed, 81 insertions(+), 36 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 9526eef711d5..1f04a26e4c65 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -424,17 +424,6 @@ static inline void fib6_nh_release(struct fib6_nh *fib6_nh)
 	lwtstate_put(fib6_nh->nh_lwtstate);
 }
 
-static inline struct net_device *fib6_info_nh_dev(const struct fib6_info *f6i)
-{
-	return f6i->fib6_nh->nh_dev;
-}
-
-static inline
-struct lwtunnel_state *fib6_info_nh_lwt(const struct fib6_info *f6i)
-{
-	return f6i->fib6_nh->nh_lwtstate;
-}
-
 void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
 		     unsigned int flags);
 
diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index b1ca637acb2a..0cdfe176c530 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -2,6 +2,8 @@
 #ifndef _NET_IP6_ROUTE_H
 #define _NET_IP6_ROUTE_H
 
+#include <net/nexthop.h>
+
 struct route_info {
 	__u8			type;
 	__u8			length;
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index c149fe8394ab..dae1518af3f3 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -160,6 +160,46 @@ static inline __be32 fib_info_nh_gw(struct fib_info *fi)
 	return fib_nh ? fib_nh->nh_gw : 0;
 }
 
+/* IPv6 variants
+ */
+static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
+{
+	struct nh_info *nhi;
+
+	nhi = rcu_dereference(nh->nh_info);
+	if (nhi->family == AF_INET6)
+		return &nhi->fib6_nh;
+
+	return NULL;
+}
+
+static inline struct fib6_nh *fib6_info_nh(struct fib6_info *f6i)
+{
+	return f6i->fib6_nh;
+}
+
+static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i)
+{
+	struct fib6_nh *fib6_nh = fib6_info_nh(f6i);
+
+	return fib6_nh ? fib6_nh->nh_dev : NULL;
+}
+
+static inline struct in6_addr *fib6_info_nh_gw(struct fib6_info *f6i)
+{
+	struct fib6_nh *fib6_nh = fib6_info_nh(f6i);
+
+	return fib6_nh ? &fib6_nh->nh_gw : NULL;
+}
+
+static inline
+struct lwtunnel_state *fib6_info_nh_lwt(struct fib6_info *f6i)
+{
+	struct fib6_nh *fib6_nh = fib6_info_nh(f6i);
+
+	return fib6_nh ? fib6_nh->nh_lwtstate : NULL;
+}
+
 int fib_check_nexthop(struct fib_info *fi, struct fib_config *cfg,
 		      struct netlink_ext_ack *extack);
 
diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h
index 037df3d2be0b..4e5e36cc35b9 100644
--- a/include/trace/events/fib6.h
+++ b/include/trace/events/fib6.h
@@ -36,7 +36,7 @@ TRACE_EVENT(fib6_table_lookup,
 	),
 
 	TP_fast_assign(
-		struct fib6_nh *fib6_nh = f6i->fib6_nh;
+		struct fib6_nh *fib6_nh = fib6_info_nh(f6i);
 		struct in6_addr *in6;
 
 		__entry->tb_id = table->tb6_id;
diff --git a/net/core/filter.c b/net/core/filter.c
index bc979edf06ca..4d227fae69c8 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4340,6 +4340,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 {
 	struct in6_addr *src = (struct in6_addr *) params->ipv6_src;
 	struct in6_addr *dst = (struct in6_addr *) params->ipv6_dst;
+	struct fib6_nh *fib6_nh;
 	struct neighbour *neigh;
 	struct net_device *dev;
 	struct inet6_dev *idev;
@@ -4428,13 +4429,17 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 			return BPF_FIB_LKUP_RET_FRAG_NEEDED;
 	}
 
-	if (f6i->fib6_nh->nh_lwtstate)
+	fib6_nh = fib6_info_nh(f6i);
+	if (!fib6_nh)
+		return BPF_FIB_LKUP_RET_NOT_FWDED;
+
+	if (fib6_nh->nh_lwtstate)
 		return BPF_FIB_LKUP_RET_UNSUPP_LWT;
 
 	if (f6i->fib6_flags & RTF_GATEWAY)
-		*dst = f6i->fib6_nh->nh_gw;
+		*dst = fib6_nh->nh_gw;
 
-	dev = f6i->fib6_nh->nh_dev;
+	dev = fib6_nh->nh_dev;
 	params->rt_metric = f6i->fib6_metric;
 
 	/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 5792f57fdb91..2c140ce95eb4 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -533,8 +533,8 @@ static void rt6_probe(struct fib6_info *rt)
 	if (!rt || !(rt->fib6_flags & RTF_GATEWAY))
 		return;
 
-	nh_gw = &rt->fib6_nh->nh_gw;
-	dev = rt->fib6_nh->nh_dev;
+	nh_gw = fib6_info_nh_gw(rt);
+	dev = fib6_info_nh_dev(rt);
 	rcu_read_lock_bh();
 	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
 	if (neigh) {
@@ -580,9 +580,9 @@ static inline void rt6_probe(struct fib6_info *rt)
  */
 static inline int rt6_check_dev(struct fib6_info *rt, int oif)
 {
-	const struct net_device *dev = rt->fib6_nh->nh_dev;
+	const struct net_device *dev = fib6_info_nh_dev(rt);
 
-	if (!oif || dev->ifindex == oif)
+	if (!oif || (dev && dev->ifindex == oif))
 		return 2;
 	return 0;
 }
@@ -590,6 +590,8 @@ static inline int rt6_check_dev(struct fib6_info *rt, int oif)
 static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
 {
 	enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
+	const struct in6_addr *nh_gw = fib6_info_nh_gw(rt);
+	struct net_device *dev = fib6_info_nh_dev(rt);
 	struct neighbour *neigh;
 
 	if (rt->fib6_flags & RTF_NONEXTHOP ||
@@ -597,8 +599,7 @@ static inline enum rt6_nud_state rt6_check_neigh(struct fib6_info *rt)
 		return RT6_NUD_SUCCEED;
 
 	rcu_read_lock_bh();
-	neigh = __ipv6_neigh_lookup_noref(rt->fib6_nh->nh_dev,
-					  &rt->fib6_nh->nh_gw);
+	neigh = __ipv6_neigh_lookup_noref(dev, nh_gw);
 	if (neigh) {
 		read_lock(&neigh->lock);
 		if (neigh->nud_state & NUD_VALID)
@@ -638,12 +639,12 @@ static int rt6_score_route(struct fib6_info *rt, int oif, int strict)
 }
 
 /* called with rc_read_lock held */
-// TO-DO: if (!f6i->nh)
 static inline bool fib6_ignore_linkdown(const struct fib6_info *f6i)
 {
-	const struct net_device *dev = fib6_info_nh_dev(f6i);
+	const struct net_device *dev;
 	bool rc = false;
 
+	dev = f6i->fib6_nh->nh_dev;
 	if (dev) {
 		const struct inet6_dev *idev = __in6_dev_get(dev);
 
@@ -869,7 +870,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 /* called with rcu_lock held */
 static struct net_device *ip6_rt_get_dev_rcu(struct fib6_info *rt)
 {
-	struct net_device *dev = rt->fib6_nh->nh_dev;
+	struct net_device *dev = fib6_info_nh_dev(rt);
 
 	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)) {
 		/* for copies of local routes, dst->dev needs to be the
@@ -947,6 +948,8 @@ static void ip6_rt_init_dst_reject(struct rt6_info *rt, struct fib6_info *ort)
 
 static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
 {
+	struct lwtunnel_state *lws;
+
 	rt->dst.flags |= fib6_info_dst_flags(ort);
 
 	if (ort->fib6_flags & RTF_REJECT) {
@@ -965,8 +968,9 @@ static void ip6_rt_init_dst(struct rt6_info *rt, struct fib6_info *ort)
 		rt->dst.input = ip6_forward;
 	}
 
-	if (ort->fib6_nh->nh_lwtstate) {
-		rt->dst.lwtstate = lwtstate_get(ort->fib6_nh->nh_lwtstate);
+	lws = fib6_info_nh_lwt(ort);
+	if (lws) {
+		rt->dst.lwtstate = lwtstate_get(lws);
 		lwtunnel_set_redirect(&rt->dst);
 	}
 
@@ -985,19 +989,20 @@ static void rt6_set_from(struct rt6_info *rt, struct fib6_info *from)
 static void ip6_rt_copy_init(struct rt6_info *rt, struct fib6_info *ort)
 {
 	struct net_device *dev = fib6_info_nh_dev(ort);
+	struct fib6_nh *fib6_nh = fib6_info_nh(ort);
 
 	ip6_rt_init_dst(rt, ort);
 
 	rt->rt6i_dst = ort->fib6_dst;
 	rt->rt6i_idev = dev ? in6_dev_get(dev) : NULL;
-	rt->rt6i_gateway = ort->fib6_nh->nh_gw;
+	rt->rt6i_gateway = fib6_nh->nh_gw;
 	rt->rt6i_flags = ort->fib6_flags;
 	rt6_set_from(rt, ort);
 #ifdef CONFIG_IPV6_SUBTREES
 	rt->rt6i_src = ort->fib6_src;
 #endif
 	rt->rt6i_prefsrc = ort->fib6_prefsrc;
-	rt->dst.lwtstate = lwtstate_get(ort->fib6_nh->nh_lwtstate);
+	rt->dst.lwtstate = lwtstate_get(fib6_nh->nh_lwtstate);
 }
 
 static struct fib6_node* fib6_backtrack(struct fib6_node *fn,
@@ -1039,7 +1044,7 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
 static struct rt6_info *ip6_create_rt_rcu(struct fib6_info *rt)
 {
 	unsigned short flags = fib6_info_dst_flags(rt);
-	struct net_device *dev = rt->fib6_nh->nh_dev;
+	struct net_device *dev = fib6_info_nh_dev(rt);
 	struct rt6_info *nrt;
 
 	if (!fib6_info_hold_safe(rt))
@@ -1392,8 +1397,9 @@ __rt6_find_exception_rcu(struct rt6_exception_bucket **bucket,
 	return NULL;
 }
 
-static unsigned int fib6_mtu(const struct fib6_info *rt)
+static unsigned int fib6_mtu(struct fib6_info *rt)
 {
+	struct lwtunnel_state *lws = fib6_info_nh_lwt(rt);
 	unsigned int mtu;
 
 	if (rt->fib6_pmtu) {
@@ -1410,7 +1416,7 @@ static unsigned int fib6_mtu(const struct fib6_info *rt)
 
 	mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
 
-	return mtu - lwtunnel_headroom(rt->fib6_nh->nh_lwtstate, mtu);
+	return mtu - lwtunnel_headroom(lws, mtu);
 }
 
 static int rt6_insert_exception(struct rt6_info *nrt,
@@ -2454,7 +2460,9 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 	fn = fib6_node_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 restart:
 	for_each_fib6_node_rt_rcu(fn) {
-		if (rt->fib6_nh->nh_flags & RTNH_F_DEAD)
+		struct fib6_nh *fib6_nh = fib6_info_nh(rt);
+
+		if (fib6_nh->nh_flags & RTNH_F_DEAD)
 			continue;
 		if (fib6_check_expired(rt))
 			continue;
@@ -2462,14 +2470,14 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
 			break;
 		if (!(rt->fib6_flags & RTF_GATEWAY))
 			continue;
-		if (fl6->flowi6_oif != rt->fib6_nh->nh_dev->ifindex)
+		if (fl6->flowi6_oif != fib6_nh->nh_dev->ifindex)
 			continue;
 		/* rt_cache's gateway might be different from its 'parent'
 		 * in the case of an ip redirect.
 		 * So we keep searching in the exception table if the gateway
 		 * is different.
 		 */
-		if (!ipv6_addr_equal(&rdfl->gateway, &rt->fib6_nh->nh_gw)) {
+		if (!ipv6_addr_equal(&rdfl->gateway, &fib6_nh->nh_gw)) {
 			rt_cache = rt6_find_cached_rt(rt,
 						      &fl6->daddr,
 						      &fl6->saddr);
@@ -3804,8 +3812,9 @@ static int fib6_remove_prefsrc(struct fib6_info *rt, void *arg)
 	struct net_device *dev = ((struct arg_dev_net_ip *)arg)->dev;
 	struct net *net = ((struct arg_dev_net_ip *)arg)->net;
 	struct in6_addr *addr = ((struct arg_dev_net_ip *)arg)->addr;
+	struct net_device *nh_dev = fib6_info_nh_dev(rt);
 
-	if (((void *)rt->fib6_nh->nh_dev == dev || !dev) &&
+	if ((nh_dev == dev || !dev) &&
 	    rt != net->ipv6.fib6_null_entry &&
 	    ipv6_addr_equal(addr, &rt->fib6_prefsrc.addr)) {
 		spin_lock_bh(&rt6_exception_lock);
@@ -4127,7 +4136,7 @@ static int rt6_mtu_change_route(struct fib6_info *rt, void *p_arg)
 	   Since RFC 1981 doesn't include administrative MTU increase
 	   update PMTU increase is a MUST. (i.e. jumbo frame)
 	 */
-	if (rt->fib6_nh->nh_dev == arg->dev &&
+	if (fib6_info_nh_dev(rt) == arg->dev &&
 	    !fib6_metric_locked(rt, RTAX_MTU)) {
 		u32 mtu = rt->fib6_pmtu;
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH RFC net-next 07/18] net: ipv4: Add fib_nh to fib_result
From: dsahern @ 2018-09-01  0:49 UTC (permalink / raw)
  To: netdev; +Cc: roopa, sharpd, idosch, davem, David Ahern
In-Reply-To: <20180901004954.7145-1-dsahern@kernel.org>

From: David Ahern <dsahern@gmail.com>

Add nexthop selection to fib_result and update FIB_RES macros to
use it. Right now, fib_nh in fib_result will point to a nexthop
within a fib_info. Later, fib_nh can point to data with a nexthop.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip_fib.h     | 21 +++++++++------------
 net/core/filter.c        |  2 +-
 net/ipv4/fib_frontend.c  |  4 ++--
 net/ipv4/fib_semantics.c |  4 ++--
 net/ipv4/fib_trie.c      |  4 ++--
 net/ipv4/route.c         | 18 +++++++++---------
 6 files changed, 25 insertions(+), 28 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index ce9b92485064..0b40c59b8a5f 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -141,6 +141,7 @@ struct fib_result {
 	unsigned char	type;
 	unsigned char	scope;
 	u32		tclassid;
+	struct fib_nh	*nh;
 	struct fib_info *fi;
 	struct fib_table *table;
 	struct hlist_head *fa_head;
@@ -161,11 +162,7 @@ struct fib_result_nl {
 	int             err;
 };
 
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-#define FIB_RES_NH(res)		((res).fi->fib_nh[(res).nh_sel])
-#else /* CONFIG_IP_ROUTE_MULTIPATH */
-#define FIB_RES_NH(res)		((res).fi->fib_nh[0])
-#endif /* CONFIG_IP_ROUTE_MULTIPATH */
+#define FIB_RES_NH(res)		((res).nh)
 
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 #define FIB_TABLE_HASHSZ 256
@@ -177,13 +174,13 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh,
 				unsigned char scope);
 
 #define FIB_RES_SADDR(net, res)				\
-	((FIB_RES_NH(res).nh_saddr_genid ==		\
+	((FIB_RES_NH(res)->nh_saddr_genid ==		\
 	  atomic_read(&(net)->ipv4.dev_addr_genid)) ?	\
-	 FIB_RES_NH(res).nh_saddr :			\
-	 fib_info_update_nh_saddr((net), &FIB_RES_NH(res), (res).fi->fib_scope))
-#define FIB_RES_GW(res)			(FIB_RES_NH(res).nh_gw)
-#define FIB_RES_DEV(res)		(FIB_RES_NH(res).nh_dev)
-#define FIB_RES_OIF(res)		(FIB_RES_NH(res).nh_oif)
+	 FIB_RES_NH(res)->nh_saddr :			\
+	 fib_info_update_nh_saddr((net), FIB_RES_NH(res), (res).fi->fib_scope))
+#define FIB_RES_GW(res)			(FIB_RES_NH(res)->nh_gw)
+#define FIB_RES_DEV(res)		(FIB_RES_NH(res)->nh_dev)
+#define FIB_RES_OIF(res)		(FIB_RES_NH(res)->nh_oif)
 
 #define FIB_RES_PREFSRC(net, res)	((res).fi->fib_prefsrc ? : \
 					 FIB_RES_SADDR(net, res))
@@ -422,7 +419,7 @@ static inline void fib_combine_itag(u32 *itag, const struct fib_result *res)
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	u32 rtag;
 #endif
-	*itag = FIB_RES_NH(*res).nh_tclassid<<16;
+	*itag = FIB_RES_NH(*res)->nh_tclassid << 16;
 #ifdef CONFIG_IP_MULTIPLE_TABLES
 	rtag = res->tclassid;
 	if (*itag == 0)
diff --git a/net/core/filter.c b/net/core/filter.c
index c25eb36f1320..0ba4c477415d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4311,7 +4311,7 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
 			return BPF_FIB_LKUP_RET_FRAG_NEEDED;
 	}
 
-	nh = &res.fi->fib_nh[res.nh_sel];
+	nh = res.nh;
 
 	/* do not handle lwt encaps right now */
 	if (nh->nh_lwtstate)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index b0910d8c8bd4..2f9bf1ec2678 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -380,7 +380,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 		dev_match = true;
 #endif
 	if (dev_match) {
-		ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
+		ret = FIB_RES_NH(res)->nh_scope >= RT_SCOPE_HOST;
 		return ret;
 	}
 	if (no_addr)
@@ -392,7 +392,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 	ret = 0;
 	if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) {
 		if (res.type == RTN_UNICAST)
-			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
+			ret = FIB_RES_NH(res)->nh_scope >= RT_SCOPE_HOST;
 	}
 	return ret;
 
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 0d792666821a..53e38ecfdd58 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1728,7 +1728,7 @@ void fib_select_multipath(struct fib_result *res, int hash)
 			if (!fib_good_nh(nh))
 				continue;
 			if (!first) {
-				res->nh_sel = nhsel;
+				res->nh = &fi->fib_nh[nhsel];
 				first = true;
 			}
 		}
@@ -1736,7 +1736,7 @@ void fib_select_multipath(struct fib_result *res, int hash)
 		if (hash > atomic_read(&nh->nh_upper_bound))
 			continue;
 
-		res->nh_sel = nhsel;
+		res->nh = &fi->fib_nh[nhsel];
 		return;
 	} endfor_nexthops(fi);
 }
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 5bc0c89e81e4..51e7b38f3a7b 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1468,7 +1468,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 		if (fi->fib_flags & RTNH_F_DEAD)
 			continue;
 		for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
-			const struct fib_nh *nh = &fi->fib_nh[nhsel];
+			struct fib_nh *nh = &fi->fib_nh[nhsel];
 			struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev);
 
 			if (nh->nh_flags & RTNH_F_DEAD)
@@ -1489,7 +1489,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 
 			res->prefix = htonl(n->key);
 			res->prefixlen = KEYLENGTH - fa->fa_slen;
-			res->nh_sel = nhsel;
+			res->nh = nh;
 			res->type = fa->fa_type;
 			res->scope = fi->fib_scope;
 			res->fi = fi;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index b678466da451..1297c7c934a8 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -776,7 +776,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
 			neigh_event_send(n, NULL);
 		} else {
 			if (fib_lookup(net, fl4, &res, 0) == 0) {
-				struct fib_nh *nh = &FIB_RES_NH(res);
+				struct fib_nh *nh = FIB_RES_NH(res);
 
 				update_or_create_fnhe(nh, fl4->daddr, new_gw,
 						0, false,
@@ -1021,7 +1021,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
 
 	rcu_read_lock();
 	if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
-		struct fib_nh *nh = &FIB_RES_NH(res);
+		struct fib_nh *nh = FIB_RES_NH(res);
 
 		update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock,
 				      jiffies + ip_rt_mtu_expires);
@@ -1350,7 +1350,7 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
 u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr)
 {
 	struct fib_info *fi = res->fi;
-	struct fib_nh *nh = &fi->fib_nh[res->nh_sel];
+	struct fib_nh *nh = res->nh;
 	struct net_device *dev = nh->nh_dev;
 	u32 mtu = 0;
 
@@ -1527,7 +1527,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
 	bool cached = false;
 
 	if (fi) {
-		struct fib_nh *nh = &FIB_RES_NH(*res);
+		struct fib_nh *nh = FIB_RES_NH(*res);
 
 		if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
 			rt->rt_gateway = nh->nh_gw;
@@ -1744,12 +1744,12 @@ static int __mkroute_input(struct sk_buff *skb,
 		}
 	}
 
-	fnhe = find_exception(&FIB_RES_NH(*res), daddr);
+	fnhe = find_exception(FIB_RES_NH(*res), daddr);
 	if (do_cache) {
 		if (fnhe)
 			rth = rcu_dereference(fnhe->fnhe_rth_input);
 		else
-			rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+			rth = rcu_dereference(FIB_RES_NH(*res)->nh_rth_input);
 		if (rt_cache_valid(rth)) {
 			skb_dst_set_noref(skb, &rth->dst);
 			goto out;
@@ -2039,7 +2039,7 @@ out:	return err;
 	do_cache = false;
 	if (res->fi) {
 		if (!itag) {
-			rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
+			rth = rcu_dereference(FIB_RES_NH(*res)->nh_rth_input);
 			if (rt_cache_valid(rth)) {
 				skb_dst_set_noref(skb, &rth->dst);
 				err = 0;
@@ -2069,7 +2069,7 @@ out:	return err;
 	}
 
 	if (do_cache) {
-		struct fib_nh *nh = &FIB_RES_NH(*res);
+		struct fib_nh *nh = FIB_RES_NH(*res);
 
 		rth->dst.lwtstate = lwtstate_get(nh->nh_lwtstate);
 		if (lwtunnel_input_redirect(rth->dst.lwtstate)) {
@@ -2249,7 +2249,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 	do_cache &= fi != NULL;
 	if (fi) {
 		struct rtable __rcu **prth;
-		struct fib_nh *nh = &FIB_RES_NH(*res);
+		struct fib_nh *nh = FIB_RES_NH(*res);
 
 		fnhe = find_exception(nh, fl4->daddr);
 		if (!do_cache)
-- 
2.11.0

^ permalink raw reply related

* [PATCH RFC net-next 14/18] net/ipv4: Allow routes to use nexthop objects
From: dsahern @ 2018-09-01  0:49 UTC (permalink / raw)
  To: netdev; +Cc: roopa, sharpd, idosch, davem, David Ahern
In-Reply-To: <20180901004954.7145-1-dsahern@kernel.org>

From: David Ahern <dsahern@gmail.com>

Add new RTA attribute to allow a user to specify a nexthop id to use
with a route instead of the current nexthop specification.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/ip_fib.h           |   1 +
 include/uapi/linux/rtnetlink.h |   1 +
 net/ipv4/fib_frontend.c        |   7 +++
 net/ipv4/fib_semantics.c       | 139 ++++++++++++++++++++++++++++++-----------
 net/ipv4/fib_trie.c            |  33 +++++++---
 5 files changed, 136 insertions(+), 45 deletions(-)

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index c59e0f1ba59b..d2f961de732d 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -40,6 +40,7 @@ struct fib_config {
 	u32			fc_flags;
 	u32			fc_priority;
 	__be32			fc_prefsrc;
+	u32			fc_nh_id;
 	struct nlattr		*fc_mx;
 	struct rtnexthop	*fc_mp;
 	int			fc_mx_len;
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 4a0615797e5e..a036368798a9 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -349,6 +349,7 @@ enum rtattr_type_t {
 	RTA_IP_PROTO,
 	RTA_SPORT,
 	RTA_DPORT,
+	RTA_NH_ID,
 	__RTA_MAX
 };
 
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index c483453bf037..cf133d4e02f2 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -322,6 +322,9 @@ static bool fib_info_nh_uses_dev(struct fib_info *fi,
 	bool dev_match = false;
 	int ret;
 
+	if (fi->nh)
+		return nexthop_uses_dev(fi->nh, dev);
+
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	for (ret = 0; ret < fi->fib_nhs; ret++) {
 		struct fib_nh *nh = &fi->fib_nh[ret];
@@ -663,6 +666,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
 	[RTA_IP_PROTO]		= { .type = NLA_U8 },
 	[RTA_SPORT]		= { .type = NLA_U16 },
 	[RTA_DPORT]		= { .type = NLA_U16 },
+	[RTA_NH_ID]		= { .type = NLA_U32 },
 };
 
 static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
@@ -746,6 +750,9 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
 			if (err < 0)
 				goto errout;
 			break;
+		case RTA_NH_ID:
+			cfg->fc_nh_id = nla_get_u32(attr);
+			break;
 		}
 	}
 
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 0cd536ad1761..c91cdafd40ec 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -226,9 +226,13 @@ static void free_fib_info_rcu(struct rcu_head *head)
 	struct fib_info *fi = container_of(head, struct fib_info, rcu);
 	struct dst_metrics *m;
 
-	change_nexthops(fi) {
-		fib_nh_release(fi->fib_net, nexthop_nh);
-	} endfor_nexthops(fi);
+	if (fi->nh) {
+		nexthop_put(fi->nh);
+	} else {
+		change_nexthops(fi) {
+			fib_nh_release(fi->fib_net, nexthop_nh);
+		} endfor_nexthops(fi);
+	}
 
 	m = fi->fib_metrics;
 	if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt))
@@ -260,11 +264,15 @@ void fib_release_info(struct fib_info *fi)
 		hlist_del(&fi->fib_hash);
 		if (fi->fib_prefsrc)
 			hlist_del(&fi->fib_lhash);
-		change_nexthops(fi) {
-			if (!nexthop_nh->nh_dev)
-				continue;
-			hlist_del(&nexthop_nh->nh_hash);
-		} endfor_nexthops(fi)
+		if (fi->nh) {
+			list_del(&fi->nh_list);
+		} else {
+			change_nexthops(fi) {
+				if (!nexthop_nh->nh_dev)
+					continue;
+				hlist_del(&nexthop_nh->nh_hash);
+			} endfor_nexthops(fi)
+		}
 		fi->fib_dead = 1;
 		fib_info_put(fi);
 	}
@@ -275,6 +283,12 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
 {
 	const struct fib_nh *onh = ofi->fib_nh;
 
+	if (fi->nh || ofi->nh)
+		return nexthop_cmp(fi->nh, ofi->nh) ? 0 : -1;
+
+	if (ofi->fib_nhs == 0)
+		return 0;
+
 	for_nexthops(fi) {
 		if (nh->nh_oif != onh->nh_oif ||
 		    nh->nh_gw  != onh->nh_gw ||
@@ -310,10 +324,13 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
 	val ^= (fi->fib_protocol << 8) | fi->fib_scope;
 	val ^= (__force u32)fi->fib_prefsrc;
 	val ^= fi->fib_priority;
-	for_nexthops(fi) {
-		val ^= fib_devindex_hashfn(nh->nh_oif);
-	} endfor_nexthops(fi)
-
+	if (fi->nh) {
+		val ^= fib_devindex_hashfn(fi->nh->id);
+	} else {
+		for_nexthops(fi) {
+			val ^= fib_devindex_hashfn(nh->nh_oif);
+		} endfor_nexthops(fi)
+	}
 	return (val ^ (val >> 7) ^ (val >> 12)) & mask;
 }
 
@@ -339,7 +356,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
 		    memcmp(nfi->fib_metrics, fi->fib_metrics,
 			   sizeof(u32) * RTAX_MAX) == 0 &&
 		    !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
-		    (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
+		    (nh_comp(fi, nfi) == 0))
 			return fi;
 	}
 
@@ -349,6 +366,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
 /* Check, that the gateway is already configured.
  * Used only by redirect accept routine.
  */
+//TO-DO: need a nexthop version
 int ip_fib_check_default(__be32 gw, struct net_device *dev)
 {
 	struct hlist_head *head;
@@ -381,16 +399,19 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
 			 + nla_total_size(4) /* RTA_PRIORITY */
 			 + nla_total_size(4) /* RTA_PREFSRC */
 			 + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
+	size_t nhsize = 0;
 
 	/* space for nested metrics */
 	payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
 
-	if (fi->fib_nhs) {
+	if (fi->nh) {
+		nhsize = nla_total_size(4); /* RTA_NH_ID */
+	} else if (fi->fib_nhs) {
 		size_t nh_encapsize = 0;
 		/* Also handles the special case fib_nhs == 1 */
 
 		/* each nexthop is packed in an attribute */
-		size_t nhsize = nla_total_size(sizeof(struct rtnexthop));
+		nhsize = nla_total_size(sizeof(struct rtnexthop));
 
 		/* may contain flow and gateway attribute */
 		nhsize += 2 * nla_total_size(4);
@@ -539,6 +560,7 @@ static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining,
 	return nhs;
 }
 
+/* only called when fib_nh is integrated into fib_info */
 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
 		       int remaining, struct fib_config *cfg,
 		       struct netlink_ext_ack *extack)
@@ -625,6 +647,8 @@ static void fib_rebalance(struct fib_info *fi)
 	int w;
 	struct in_device *in_dev;
 
+	WARN_ON(fi->nh);
+
 	if (fi->fib_nhs < 2)
 		return;
 
@@ -712,6 +736,9 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi,
 	if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
 		return 1;
 
+	if (fi->nh)
+		return cfg->fc_nh_id == fi->nh->id ? 0 : 1;
+
 	if (cfg->fc_oif || cfg->fc_gw) {
 		if (cfg->fc_encap) {
 			if (fib_encap_match(cfg->fc_encap_type, cfg->fc_encap,
@@ -1099,9 +1126,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 {
 	int err;
 	struct fib_info *fi = NULL;
+	struct nexthop *nh = NULL;
 	struct fib_info *ofi;
 	int nhs = 1;
 	struct net *net = cfg->fc_nlinfo.nl_net;
+	unsigned char scope;
 
 	if (cfg->fc_type > RTN_MAX)
 		goto err_inval;
@@ -1118,6 +1147,21 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 		goto err_inval;
 	}
 
+	if (cfg->fc_nh_id) {
+		if (cfg->fc_oif || cfg->fc_gw || cfg->fc_encap || cfg->fc_mp) {
+			NL_SET_ERR_MSG(extack,
+				       "Nexthop specification and nexthop id are mutually exclusive");
+			goto err_inval;
+		}
+
+		nh = nexthop_find_by_id(net, cfg->fc_nh_id);
+		if (!nh) {
+			NL_SET_ERR_MSG(extack,
+				       "Invalid nexthop id - nexthop does not exist");
+			goto err_inval;
+		}
+	}
+
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	if (cfg->fc_mp) {
 		nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len, extack);
@@ -1180,7 +1224,10 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 	if (err)
 		goto failure;
 
-	if (cfg->fc_mp) {
+	if (nh) {
+		nexthop_get(nh);
+		fi->nh = nh;
+	} else if (cfg->fc_mp) {
 		err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg, extack);
 		if (err != 0)
 			goto failure;
@@ -1214,7 +1261,11 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 		goto err_inval;
 	}
 
-	if (cfg->fc_scope == RT_SCOPE_HOST) {
+	if (fi->nh) {
+		err = fib_check_nexthop(fi, cfg, extack);
+		if (err)
+			goto failure;
+	} else if (cfg->fc_scope == RT_SCOPE_HOST) {
 		struct fib_nh *nh = fi->fib_nh;
 
 		/* Local address is added. */
@@ -1254,12 +1305,14 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 		goto err_inval;
 	}
 
-	change_nexthops(fi) {
-		fib_info_update_nh_saddr(net, nexthop_nh, fi->fib_scope);
-	} endfor_nexthops(fi)
-
-	fib_rebalance(fi);
+	if (!fi->nh) {
+		scope = fi->fib_scope;
+		change_nexthops(fi) {
+			fib_info_update_nh_saddr(net, nexthop_nh, scope);
+		} endfor_nexthops(fi)
 
+		fib_rebalance(fi);
+	}
 link_it:
 	ofi = fib_find_info(fi);
 	if (ofi) {
@@ -1280,16 +1333,20 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 		head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
 		hlist_add_head(&fi->fib_lhash, head);
 	}
-	change_nexthops(fi) {
-		struct hlist_head *head;
-		unsigned int hash;
+	if (fi->nh) {
+		list_add(&fi->nh_list, &nh->fi_list);
+	} else {
+		change_nexthops(fi) {
+			struct hlist_head *head;
+			unsigned int hash;
 
-		if (!nexthop_nh->nh_dev)
-			continue;
-		hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex);
-		head = &fib_info_devhash[hash];
-		hlist_add_head(&nexthop_nh->nh_hash, head);
-	} endfor_nexthops(fi)
+			if (!nexthop_nh->nh_dev)
+				continue;
+			hash = fib_devindex_hashfn(nexthop_nh->nh_dev->ifindex);
+			head = &fib_info_devhash[hash];
+			hlist_add_head(&nexthop_nh->nh_hash, head);
+		} endfor_nexthops(fi)
+	}
 	spin_unlock_bh(&fib_info_lock);
 	return fi;
 
@@ -1298,6 +1355,9 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
 
 failure:
 	if (fi) {
+		if (fi->nh)
+			nexthop_put(fi->nh);
+
 		fi->fib_dead = 1;
 		free_fib_info(fi);
 	}
@@ -1344,7 +1404,11 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 	if (fi->fib_prefsrc &&
 	    nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
 		goto nla_put_failure;
-	if (fi->fib_nhs == 1) {
+
+	if (fi->nh) {
+		if (nla_put_u32(skb, RTA_NH_ID, fi->nh->id))
+			goto nla_put_failure;
+	} else if (fi->fib_nhs == 1) {
 		if (fi->fib_nh->nh_gw &&
 		    nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
 			goto nla_put_failure;
@@ -1587,8 +1651,11 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
 				continue;
 			break;
 		}
-		if (next_fi->fib_flags & RTNH_F_DEAD)
+
+		fnh = fib_info_nh(next_fi, 0);
+		if (fnh->nh_flags & RTNH_F_DEAD)
 			continue;
+
 		last_tos = fa->fa_tos;
 		last_prio = next_fi->fib_priority;
 
@@ -1596,7 +1663,6 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
 		    fa->fa_type != RTN_UNICAST)
 			continue;
 
-		fnh = fib_info_nh(next_fi, 0);
 		if (!fnh->nh_gw || fnh->nh_scope != RT_SCOPE_LINK)
 			continue;
 
@@ -1749,13 +1815,14 @@ void fib_select_multipath(struct fib_result *res, int hash)
 void fib_select_path(struct net *net, struct fib_result *res,
 		     struct flowi4 *fl4, const struct sk_buff *skb)
 {
+	int h;
+
 	if (fl4->flowi4_oif && !(fl4->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF))
 		goto check_saddr;
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 	if (res->fi->fib_nhs > 1) {
-		int h = fib_multipath_hash(net, fl4, skb, NULL);
-
+		h = fib_multipath_hash(net, fl4, skb, NULL);
 		fib_select_multipath(res, h);
 	}
 	else
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index c6aab049a4ac..575bb34d895f 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1445,7 +1445,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 	/* Step 3: Process the leaf, if that fails fall back to backtracing */
 	hlist_for_each_entry_rcu(fa, &n->leaf, fa_list) {
 		struct fib_info *fi = fa->fa_info;
-		int nhsel, err;
+		int nhsel, err, nhmax;
 
 		if ((BITS_PER_LONG > KEYLENGTH) || (fa->fa_slen < KEYLENGTH)) {
 			if (index >= (1ul << fa->fa_slen))
@@ -1460,6 +1460,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 		fib_alias_accessed(fa);
 		err = fib_props[fa->fa_type].error;
 		if (unlikely(err < 0)) {
+out_reject:
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 			this_cpu_inc(stats->semantic_match_passed);
 #endif
@@ -1468,17 +1469,31 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 		}
 		if (fi->fib_flags & RTNH_F_DEAD)
 			continue;
-		for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
-			struct fib_nh *nh = &fi->fib_nh[nhsel];
-			struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev);
+
+		if (fi->nh) {
+			if (nexthop_is_blackhole(fi->nh)) {
+				err = fib_props[RTN_BLACKHOLE].error;
+				goto out_reject;
+			}
+			nhmax = nexthop_num_path(fi->nh);
+		} else {
+			nhmax = fi->fib_nhs;
+		}
+		for (nhsel = 0; nhsel < nhmax; nhsel++) {
+			struct fib_nh *nh = fib_info_nh(fi, nhsel);
+			struct in_device *in_dev;
 
 			if (nh->nh_flags & RTNH_F_DEAD)
 				continue;
-			if (in_dev &&
-			    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
-			    nh->nh_flags & RTNH_F_LINKDOWN &&
-			    !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
-				continue;
+
+			if (!fi->nh) {
+				in_dev = __in_dev_get_rcu(nh->nh_dev);
+				if (in_dev &&
+				    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+				    nh->nh_flags & RTNH_F_LINKDOWN &&
+				    !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
+					continue;
+			}
 			if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) {
 				if (flp->flowi4_oif &&
 				    flp->flowi4_oif != nh->nh_oif)
-- 
2.11.0

^ permalink raw reply related

* Re: [PATCH net-next v2] net/tls: Add support for async decryption of tls records
From: David Miller @ 2018-09-01  1:00 UTC (permalink / raw)
  To: vakul.garg; +Cc: netdev, borisp, aviadye, davejwatson
In-Reply-To: <20180829095655.31963-1-vakul.garg@nxp.com>

From: Vakul Garg <vakul.garg@nxp.com>
Date: Wed, 29 Aug 2018 15:26:55 +0530

> When tls records are decrypted using asynchronous acclerators such as
> NXP CAAM engine, the crypto apis return -EINPROGRESS. Presently, on
> getting -EINPROGRESS, the tls record processing stops till the time the
> crypto accelerator finishes off and returns the result. This incurs a
> context switch and is not an efficient way of accessing the crypto
> accelerators. Crypto accelerators work efficient when they are queued
> with multiple crypto jobs without having to wait for the previous ones
> to complete.
> 
> The patch submits multiple crypto requests without having to wait for
> for previous ones to complete. This has been implemented for records
> which are decrypted in zero-copy mode. At the end of recvmsg(), we wait
> for all the asynchronous decryption requests to complete.
> 
> The references to records which have been sent for async decryption are
> dropped. For cases where record decryption is not possible in zero-copy
> mode, asynchronous decryption is not used and we wait for decryption
> crypto api to complete.
> 
> For crypto requests executing in async fashion, the memory for
> aead_request, sglists and skb etc is freed from the decryption
> completion handler. The decryption completion handler wakesup the
> sleeping user context when recvmsg() flags that it has done sending
> all the decryption requests and there are no more decryption requests
> pending to be completed.
> 
> Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
> Reviewed-by: Dave Watson <davejwatson@fb.com>
> ---
> 
> Changes since v1:
> 	- Simplified recvmsg() so to drop reference to skb in case it
> 	  was submimtted for async decryption.
> 	- Modified tls_sw_advance_skb() to handle case when input skb is
> 	  NULL.

Applied.

^ permalink raw reply

* Re: [PATCH net-next v1] selftests/tls: Add test for recv(PEEK) spanning across multiple records
From: David Miller @ 2018-09-01  1:00 UTC (permalink / raw)
  To: vakul.garg; +Cc: netdev, borisp, aviadye, davejwatson
In-Reply-To: <20180829100014.32022-1-vakul.garg@nxp.com>

From: Vakul Garg <vakul.garg@nxp.com>
Date: Wed, 29 Aug 2018 15:30:14 +0530

> Added test case to receive multiple records with a single recvmsg()
> operation with a MSG_PEEK set.

Applied.

^ permalink raw reply

* [PATCH net-next] liquidio: Added delayed work for periodically updating the link statistics.
From: Felix Manlunas @ 2018-09-01  0:44 UTC (permalink / raw)
  To: davem
  Cc: netdev, raghu.vatsavayi, derek.chickles, satananda.burla,
	felix.manlunas, pradeep.nalla

From: Pradeep Nalla <pradeep.nalla@cavium.com>

Signed-off-by: Pradeep Nalla <pradeep.nalla@cavium.com>
Signed-off-by: Felix Manlunas <felix.manlunas@cavium.com>
---
 drivers/net/ethernet/cavium/liquidio/lio_core.c    | 24 +++++++++++++++-------
 drivers/net/ethernet/cavium/liquidio/lio_main.c    |  9 +++++++-
 drivers/net/ethernet/cavium/liquidio/lio_vf_main.c |  8 +++++++-
 .../net/ethernet/cavium/liquidio/liquidio_common.h |  2 ++
 drivers/net/ethernet/cavium/liquidio/octeon_iq.h   |  3 +++
 .../net/ethernet/cavium/liquidio/octeon_network.h  |  4 +++-
 6 files changed, 40 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index 30b4a60..cdc26ca 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -1352,16 +1352,19 @@ octnet_nic_stats_callback(struct octeon_device *oct_dev,
 
 		resp->status = 1;
 	} else {
+		dev_err(&oct_dev->pci_dev->dev, "sc OPCODE_NIC_PORT_STATS command failed\n");
 		resp->status = -1;
 	}
 }
 
-int octnet_get_link_stats(struct net_device *netdev)
+void lio_fetch_stats(struct work_struct *work)
 {
-	struct lio *lio = GET_LIO(netdev);
+	struct cavium_wk *wk = (struct cavium_wk *)work;
+	struct lio *lio = wk->ctxptr;
 	struct octeon_device *oct_dev = lio->oct_dev;
 	struct octeon_soft_command *sc;
 	struct oct_nic_stats_resp *resp;
+	unsigned long time_in_jiffies;
 	int retval;
 
 	/* Alloc soft command */
@@ -1371,8 +1374,10 @@ int octnet_get_link_stats(struct net_device *netdev)
 					  sizeof(struct oct_nic_stats_resp),
 					  0);
 
-	if (!sc)
-		return -ENOMEM;
+	if (!sc) {
+		dev_err(&oct_dev->pci_dev->dev, "Soft command allocation failed\n");
+		goto lio_fetch_stats_exit;
+	}
 
 	resp = (struct oct_nic_stats_resp *)sc->virtrptr;
 	memset(resp, 0, sizeof(struct oct_nic_stats_resp));
@@ -1388,20 +1393,25 @@ int octnet_get_link_stats(struct net_device *netdev)
 	retval = octeon_send_soft_command(oct_dev, sc);
 	if (retval == IQ_SEND_FAILED) {
 		octeon_free_soft_command(oct_dev, sc);
-		return -EINVAL;
+		goto lio_fetch_stats_exit;
 	}
 
 	retval = wait_for_sc_completion_timeout(oct_dev, sc,
 						(2 * LIO_SC_MAX_TMO_MS));
 	if (retval)  {
 		dev_err(&oct_dev->pci_dev->dev, "sc OPCODE_NIC_PORT_STATS command failed\n");
-		return retval;
+		goto lio_fetch_stats_exit;
 	}
 
 	octnet_nic_stats_callback(oct_dev, sc->sc_status, sc);
 	WRITE_ONCE(sc->caller_is_done, true);
 
-	return 0;
+lio_fetch_stats_exit:
+	time_in_jiffies = msecs_to_jiffies(LIQUIDIO_NDEV_STATS_POLL_TIME_MS);
+	if (ifstate_check(lio, LIO_IFSTATE_RUNNING))
+		schedule_delayed_work(&lio->stats_wk.work, time_in_jiffies);
+
+	return;
 }
 
 int liquidio_set_speed(struct lio *lio, int speed)
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index ed5fc6e..e973662 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -1841,6 +1841,12 @@ static int liquidio_open(struct net_device *netdev)
 	/* tell Octeon to start forwarding packets to host */
 	send_rx_ctrl_cmd(lio, 1);
 
+	/* start periodical statistics fetch */
+	INIT_DELAYED_WORK(&lio->stats_wk.work, lio_fetch_stats);
+	lio->stats_wk.ctxptr = lio;
+	schedule_delayed_work(&lio->stats_wk.work, msecs_to_jiffies
+					(LIQUIDIO_NDEV_STATS_POLL_TIME_MS));
+
 	dev_info(&oct->pci_dev->dev, "%s interface is opened\n",
 		 netdev->name);
 
@@ -1881,6 +1887,8 @@ static int liquidio_stop(struct net_device *netdev)
 		cleanup_tx_poll_fn(netdev);
 	}
 
+	cancel_delayed_work_sync(&lio->stats_wk.work);
+
 	if (lio->ptp_clock) {
 		ptp_clock_unregister(lio->ptp_clock);
 		lio->ptp_clock = NULL;
@@ -2081,7 +2089,6 @@ liquidio_get_stats64(struct net_device *netdev,
 	lstats->rx_packets = pkts;
 	lstats->rx_dropped = drop;
 
-	octnet_get_link_stats(netdev);
 	lstats->multicast = oct->link_stats.fromwire.fw_total_mcast;
 	lstats->collisions = oct->link_stats.fromhost.total_collisions;
 
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
index 9c267b4c..fe3d935 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -917,6 +917,11 @@ static int liquidio_open(struct net_device *netdev)
 	netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
 	start_txqs(netdev);
 
+	INIT_DELAYED_WORK(&lio->stats_wk.work, lio_fetch_stats);
+	lio->stats_wk.ctxptr = lio;
+	schedule_delayed_work(&lio->stats_wk.work, msecs_to_jiffies
+					(LIQUIDIO_NDEV_STATS_POLL_TIME_MS));
+
 	/* tell Octeon to start forwarding packets to host */
 	send_rx_ctrl_cmd(lio, 1);
 
@@ -964,6 +969,8 @@ static int liquidio_stop(struct net_device *netdev)
 		oct->droq[0]->ops.poll_mode = 0;
 	}
 
+	cancel_delayed_work_sync(&lio->stats_wk.work);
+
 	dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name);
 
 	return 0;
@@ -1181,7 +1188,6 @@ liquidio_get_stats64(struct net_device *netdev,
 	lstats->rx_packets = pkts;
 	lstats->rx_dropped = drop;
 
-	octnet_get_link_stats(netdev);
 	lstats->multicast = oct->link_stats.fromwire.fw_total_mcast;
 
 	/* detailed rx_errors: */
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index 7407fcd..0decc72 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
@@ -250,6 +250,8 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry,
 #define   OCTNET_CMD_VLAN_FILTER_ENABLE 0x1
 #define   OCTNET_CMD_VLAN_FILTER_DISABLE 0x0
 
+#define   OCTNET_CMD_FAIL 0x1
+
 #define   SEAPI_CMD_SPEED_SET           0x2
 #define   SEAPI_CMD_SPEED_GET           0x3
 
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
index a04f36a..bebf3bd 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
@@ -378,6 +378,9 @@ int octeon_send_command(struct octeon_device *oct, u32 iq_no,
 			u32 force_db, void *cmd, void *buf,
 			u32 datasize, u32 reqtype);
 
+void octeon_dump_soft_command(struct octeon_device *oct,
+			      struct octeon_soft_command *sc);
+
 void octeon_prepare_soft_command(struct octeon_device *oct,
 				 struct octeon_soft_command *sc,
 				 u8 opcode, u8 subcode,
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index 807266e..ecd2a26 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -42,6 +42,7 @@ struct liquidio_if_cfg_resp {
 };
 
 #define LIO_IFCFG_WAIT_TIME    3000 /* In milli seconds */
+#define LIQUIDIO_NDEV_STATS_POLL_TIME_MS 200
 
 /* Structure of a node in list of gather components maintained by
  * NIC driver for each network device.
@@ -175,6 +176,7 @@ struct lio {
 	struct cavium_wq	sync_octeon_time_wq;
 
 	int netdev_uc_count;
+	struct cavium_wk stats_wk;
 };
 
 #define LIO_SIZE         (sizeof(struct lio))
@@ -213,7 +215,7 @@ irqreturn_t liquidio_msix_intr_handler(int irq __attribute__((unused)),
 
 int octeon_setup_interrupt(struct octeon_device *oct, u32 num_ioqs);
 
-int octnet_get_link_stats(struct net_device *netdev);
+void lio_fetch_stats(struct work_struct *work);
 
 int lio_wait_for_clean_oq(struct octeon_device *oct);
 /**

^ permalink raw reply related

* Re: pull-request: bpf-next 2018-09-01
From: David Miller @ 2018-09-01  0:43 UTC (permalink / raw)
  To: daniel; +Cc: ast, netdev
In-Reply-To: <20180901000506.8736-1-daniel@iogearbox.net>

From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat,  1 Sep 2018 02:05:06 +0200

> The following pull-request contains BPF updates for your *net-next* tree.
> 
> The main changes are:
> 
> 1) Add AF_XDP zero-copy support for i40e driver (!), from Björn and Magnus.

W00t!

> 2) BPF verifier improvements by giving each register its own liveness
>    chain which allows to simplify and getting rid of skip_callee() logic,
>    from Edward.
> 
> 3) Add bpf fs pretty print support for percpu arraymap, percpu hashmap
>    and percpu lru hashmap. Also add generic percpu formatted print on
>    bpftool so the same can be dumped there, from Yonghong.
> 
> 4) Add bpf_{set,get}sockopt() helper support for TCP_SAVE_SYN and
>    TCP_SAVED_SYN options to allow reflection of tos/tclass from received
>    SYN packet, from Nikita.
> 
> 5) Misc improvements to the BPF sockmap test cases in terms of cgroup v2
>    interaction and removal of incorrect shutdown() calls, from John.
> 
> 6) Few cleanups in xdp_umem_assign_dev() and xdpsock samples, from Prashant.

Pulled, thanks Daniel!

^ permalink raw reply

* Re: [PATCH net-next] failover: remove set but not used variable 'primary_dev'
From: YueHaibing @ 2018-09-01  1:34 UTC (permalink / raw)
  To: Samudrala, Sridhar, David S. Miller, Stephen Hemminger,
	Dan Carpenter, Alexander Duyck, Jeff Kirsher, Liran Alon,
	Joao Martins
  Cc: netdev, kernel-janitors
In-Reply-To: <23e9e24a-dfec-e112-0745-d29e4742c057@intel.com>



On 2018/9/1 0:39, Samudrala, Sridhar wrote:
> On 8/30/2018 8:46 PM, YueHaibing wrote:
>> Fixes gcc '-Wunused-but-set-variable' warning:
>>
>> drivers/net/net_failover.c: In function 'net_failover_slave_unregister':
>> drivers/net/net_failover.c:598:35: warning:
>>   variable 'primary_dev' set but not used [-Wunused-but-set-variable]
> 
> Actually this gcc option found a bug.
> We need to add this check after accessing primary_dev and standby_dev.
> 
>         if (slave_dev != primary_dev && slave_dev != standby_dev)
>                 return -ENODEV;
> 
> Can you resubmit with the right fix?
> 

sure, thank you. will send v2

> 
>>
>> Signed-off-by: YueHaibing <yuehaibing@huawei.com>
>> ---
>>   drivers/net/net_failover.c | 3 +--
>>   1 file changed, 1 insertion(+), 2 deletions(-)
>>
>> diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c
>> index 7ae1856..e103c94e 100644
>> --- a/drivers/net/net_failover.c
>> +++ b/drivers/net/net_failover.c
>> @@ -595,12 +595,11 @@ static int net_failover_slave_pre_unregister(struct net_device *slave_dev,
>>   static int net_failover_slave_unregister(struct net_device *slave_dev,
>>                        struct net_device *failover_dev)
>>   {
>> -    struct net_device *standby_dev, *primary_dev;
>> +    struct net_device *standby_dev;
>>       struct net_failover_info *nfo_info;
>>       bool slave_is_standby;
>>         nfo_info = netdev_priv(failover_dev);
>> -    primary_dev = rtnl_dereference(nfo_info->primary_dev);
>>       standby_dev = rtnl_dereference(nfo_info->standby_dev);
>>         vlan_vids_del_by_dev(slave_dev, failover_dev);
>>
> 
> 
> .
> 

^ permalink raw reply

* Re: [PATCH] veth: add software timestamping
From: David Miller @ 2018-09-01  5:57 UTC (permalink / raw)
  To: michael; +Cc: netdev, linux-kernel
In-Reply-To: <20180829152411.19341-1-michael@walle.cc>

From: Michael Walle <michael@walle.cc>
Date: Wed, 29 Aug 2018 17:24:11 +0200

> Provide a software TX timestamp as well as the ethtool query interface
> and report the software timestamp capabilities.
> 
> Tested with "ethtool -T" and two linuxptp instances each bound to a
> tunnel endpoint.
> 
> Signed-off-by: Michael Walle <michael@walle.cc>

Applied to net-next.

^ permalink raw reply

* Re: [PATCH net v2] net: bcmgenet: use MAC link status for fixed phy
From: David Miller @ 2018-09-01  5:59 UTC (permalink / raw)
  To: opendmb; +Cc: f.fainelli, netdev, linux-kernel
In-Reply-To: <1535567269-12534-1-git-send-email-opendmb@gmail.com>

From: Doug Berger <opendmb@gmail.com>
Date: Wed, 29 Aug 2018 11:27:49 -0700

> When using the fixed PHY with GENET (e.g. MOCA) the PHY link
> status can be determined from the internal link status captured
> by the MAC. This allows the PHY state machine to use the correct
> link state with the fixed PHY even if MAC link event interrupts
> are missed when the net device is opened.
> 
> Fixes: 8d88c6ebb34c ("net: bcmgenet: enable MoCA link state change detection")
> Signed-off-by: Doug Berger <opendmb@gmail.com>
> ---
> v2: increased "Fixes" sha1 to 12 digits

This doesn't apply cleanly to the net tree.

^ permalink raw reply

* Re: [PATCH v2] hv_netvsc: Fix a deadlock by getting rtnl lock earlier in netvsc_probe()
From: David Miller @ 2018-09-01  6:08 UTC (permalink / raw)
  To: decui
  Cc: kys, haiyangz, sthemmin, netdev, jopoulso, olaf, jasowang,
	linux-kernel, marcelo.cerri, apw, devel, vkuznets
In-Reply-To: <PU1P153MB0169F05DF2A05DC39B4BF360BF080@PU1P153MB0169.APCP153.PROD.OUTLOOK.COM>

From: Dexuan Cui <decui@microsoft.com>
Date: Thu, 30 Aug 2018 05:42:13 +0000

> 
> This patch fixes the race between netvsc_probe() and
> rndis_set_subchannel(), which can cause a deadlock.
> 
> These are the related 3 paths which show the deadlock:
 ...
> Before path #1 finishes, path #2 can start to run, because just before
> the "bus_probe_device(dev);" in device_add() in path #1, there is a line
> "object_uevent(&dev->kobj, KOBJ_ADD);", so systemd-udevd can
> immediately try to load hv_netvsc and hence path #2 can start to run.
> 
> Next, path #2 offloads the subchannal's initialization to a workqueue,
> i.e. path #3, so we can end up in a deadlock situation like this:
> 
> Path #2 gets the device lock, and is trying to get the rtnl lock;
> Path #3 gets the rtnl lock and is waiting for all the subchannel messages
> to be processed;
> Path #1 is trying to get the device lock, but since #2 is not releasing
> the device lock, path #1 has to sleep; since the VMBus messages are
> processed one by one, this means the sub-channel messages can't be
> procedded, so #3 has to sleep with the rtnl lock held, and finally #2
> has to sleep... Now all the 3 paths are sleeping and we hit the deadlock.
> 
> With the patch, we can make sure #2 gets both the device lock and the
> rtnl lock together, gets its job done, and releases the locks, so #1
> and #3 will not be blocked for ever.
> 
> Fixes: 8195b1396ec8 ("hv_netvsc: fix deadlock on hotplug")
> Signed-off-by: Dexuan Cui <decui@microsoft.com>

Applied and queued up for -stable.

^ permalink raw reply

* Re: [PATCH] net/rds: RDS is not Radio Data System
From: David Miller @ 2018-09-01  6:10 UTC (permalink / raw)
  To: pavel
  Cc: trivial, netdev, sowmini.varadhan, santosh.shilimkar,
	ka-cheong.poon, linux-kernel
In-Reply-To: <20180830102336.GA21063@amd>

From: Pavel Machek <pavel@ucw.cz>
Date: Thu, 30 Aug 2018 12:23:37 +0200

> Getting prompt "The RDS Protocol" (RDS) is not too helpful, and it is
> easily confused with Radio Data System (which we may want to support
> in kernel, too).
> 
> Signed-off-by: Pavel Machek <pavel@ucw.cz>
> Acked-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
> Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>

Applied.

^ permalink raw reply

* Re: [PATCH v2] rsi: remove set but not used variable 'header_size'
From: YueHaibing @ 2018-09-01  6:54 UTC (permalink / raw)
  To: Siva Rebbagondla
  Cc: Kalle Valo, davem, linux-kernel, netdev, Amit karwar,
	pavani.muthyala, keescook, Linux Wireless
In-Reply-To: <CANGSkXTDOa-TCz2wbNh7=C9px4_T7besT6jBPzfs70kVoK+PtA@mail.gmail.com>

On 2018/9/1 11:41, Siva Rebbagondla wrote:
> On Fri, Aug 31, 2018 at 4:45 PM YueHaibing <yuehaibing@huawei.com> wrote:
>>
>> Fixes gcc '-Wunused-but-set-variable' warning:
>>
>> drivers/net/wireless/rsi/rsi_91x_hal.c: In function 'rsi_send_data_pkt':
>> drivers/net/wireless/rsi/rsi_91x_hal.c:288:5: warning:
>>  variable 'header_size' set but not used [-Wunused-but-set-variable]
>>
>> Signed-off-by: YueHaibing <yuehaibing@huawei.com>
>> ---
>> v2: remove unused 'tx_params'
>> ---
>>  drivers/net/wireless/rsi/rsi_91x_hal.c | 4 ----
>>  1 file changed, 4 deletions(-)
>>
>> diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c
>> index 01edf96..182b066 100644
>> --- a/drivers/net/wireless/rsi/rsi_91x_hal.c
>> +++ b/drivers/net/wireless/rsi/rsi_91x_hal.c
>> @@ -282,10 +282,8 @@ int rsi_send_data_pkt(struct rsi_common *common, struct sk_buff *skb)
>>         struct rsi_hw *adapter = common->priv;
>>         struct ieee80211_vif *vif;
>>         struct ieee80211_tx_info *info;
>> -       struct skb_info *tx_params;
>>         struct ieee80211_bss_conf *bss;
>>         int status = -EINVAL;
>> -       u8 header_size;
>>
>>         if (!skb)
>>                 return 0;
>> @@ -297,8 +295,6 @@ int rsi_send_data_pkt(struct rsi_common *common, struct sk_buff *skb)
>>                 goto err;
>>         vif = info->control.vif;
>>         bss = &vif->bss_conf;
>> -       tx_params = (struct skb_info *)info->driver_data;
>> -       header_size = tx_params->internal_hdr_size;
> Yes, These redundant variables shall be removed.
>>
>>         if (((vif->type == NL80211_IFTYPE_STATION) ||
>>              (vif->type == NL80211_IFTYPE_P2P_CLIENT)) &&
>> --
>> 2.7.0
>>
>>
> Also, Patch title also can be changed to "[v2] rsi: remove unused variables"

Ok, will fix in v3.

Thank you.

> 
> Thanks,
> Siva Rebbagondla
> 
> .
> 

^ permalink raw reply

* [PATCH net-next] failover: Add missing check to validate 'slave_dev' in net_failover_slave_unregister
From: YueHaibing @ 2018-09-01  3:06 UTC (permalink / raw)
  To: David S. Miller, Sridhar Samudrala, Stephen Hemminger,
	Dan Carpenter, Alexander Duyck, Jeff Kirsher, Liran Alon,
	Joao Martins
  Cc: YueHaibing, netdev, kernel-janitors
In-Reply-To: <1535687218-23169-1-git-send-email-yuehaibing@huawei.com>

Fixes gcc '-Wunused-but-set-variable' warning:

drivers/net/net_failover.c: In function 'net_failover_slave_unregister':
drivers/net/net_failover.c:598:35: warning:
 variable 'primary_dev' set but not used [-Wunused-but-set-variable]

There should check the validity of 'slave_dev'.

Fixes: cfc80d9a1163 ("net: Introduce net_failover driver")
Suggested-by: Samudrala, Sridhar <sridhar.samudrala@intel.com>
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
---
 drivers/net/net_failover.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c
index 7ae1856..af1ece8 100644
--- a/drivers/net/net_failover.c
+++ b/drivers/net/net_failover.c
@@ -602,6 +602,9 @@ static int net_failover_slave_unregister(struct net_device *slave_dev,
 	nfo_info = netdev_priv(failover_dev);
 	primary_dev = rtnl_dereference(nfo_info->primary_dev);
 	standby_dev = rtnl_dereference(nfo_info->standby_dev);
+
+	if (slave_dev != primary_dev && slave_dev != standby_dev)
+		return -ENODEV;
 
 	vlan_vids_del_by_dev(slave_dev, failover_dev);
 	dev_uc_unsync(slave_dev, failover_dev);

^ permalink raw reply related

* Re: [PATCH v3] rsi: remove set but not used variables 'header_size' and 'tx_params'
From: YueHaibing @ 2018-09-01  7:21 UTC (permalink / raw)
  To: Kalle Valo, Pavani Muthyala, Kees Cook, Colin Ian King,
	Amitkumar Karwar, Prameela Rani Garnepudi, Siva Rebbagondla,
	Sushant Kumar Mishra
  Cc: linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	kernel-janitors-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1535786170-169698-1-git-send-email-yuehaibing-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>

On 2018/9/1 15:16, YueHaibing wrote:
> Fixes gcc '-Wunused-but-set-variable' warning:
> 
> drivers/net/wireless/rsi/rsi_91x_hal.c: In function 'rsi_send_data_pkt':
> drivers/net/wireless/rsi/rsi_91x_hal.c:288:5: warning:
>  variable 'header_size' set but not used [-Wunused-but-set-variable]
> 
> 'tx_params' only used for 'header_size' dereferencedï¼Oso also

sorry..., there is some messy code, Also missing changelog, I'll fix.

> can be removed.
> 
> Signed-off-by: YueHaibing <yuehaibing-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
> ---
>  drivers/net/wireless/rsi/rsi_91x_hal.c | 4 ----
>  1 file changed, 4 deletions(-)
> 
> diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c
> index 01edf96..182b066 100644
> --- a/drivers/net/wireless/rsi/rsi_91x_hal.c
> +++ b/drivers/net/wireless/rsi/rsi_91x_hal.c
> @@ -282,10 +282,8 @@ int rsi_send_data_pkt(struct rsi_common *common, struct sk_buff *skb)
>  	struct rsi_hw *adapter = common->priv;
>  	struct ieee80211_vif *vif;
>  	struct ieee80211_tx_info *info;
> -	struct skb_info *tx_params;
>  	struct ieee80211_bss_conf *bss;
>  	int status = -EINVAL;
> -	u8 header_size;
>  
>  	if (!skb)
>  		return 0;
> @@ -297,8 +295,6 @@ int rsi_send_data_pkt(struct rsi_common *common, struct sk_buff *skb)
>  		goto err;
>  	vif = info->control.vif;
>  	bss = &vif->bss_conf;
> -	tx_params = (struct skb_info *)info->driver_data;
> -	header_size = tx_params->internal_hdr_size;
>  
>  	if (((vif->type == NL80211_IFTYPE_STATION) ||
>  	     (vif->type == NL80211_IFTYPE_P2P_CLIENT)) &&
> 
> 
> .
> 

^ permalink raw reply

* [PATCH v4] rsi: remove set but not used variables 'header_size' and 'tx_params'
From: YueHaibing @ 2018-09-01  7:48 UTC (permalink / raw)
  To: Kalle Valo, Pavani Muthyala, Kees Cook, Colin Ian King,
	Amitkumar Karwar, Prameela Rani Garnepudi, Siva Rebbagondla,
	Sushant Kumar Mishra
  Cc: YueHaibing, linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	kernel-janitors-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1535786170-169698-1-git-send-email-yuehaibing-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>

Fixes gcc '-Wunused-but-set-variable' warning:

drivers/net/wireless/rsi/rsi_91x_hal.c: In function 'rsi_send_data_pkt':
drivers/net/wireless/rsi/rsi_91x_hal.c:288:5: warning:
 variable 'header_size' set but not used [-Wunused-but-set-variable]

'tx_params' only used for 'header_size' dereferenced,so also
can be removed.

Signed-off-by: YueHaibing <yuehaibing-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
v4:add missing changlog,also wipe a messy code in commit log
v3:fix patch title as Siva Rebbagondla suggested
v2:remove unused 'tx_params'
---
 drivers/net/wireless/rsi/rsi_91x_hal.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/wireless/rsi/rsi_91x_hal.c b/drivers/net/wireless/rsi/rsi_91x_hal.c
index 01edf96..182b066 100644
--- a/drivers/net/wireless/rsi/rsi_91x_hal.c
+++ b/drivers/net/wireless/rsi/rsi_91x_hal.c
@@ -282,10 +282,8 @@ int rsi_send_data_pkt(struct rsi_common *common, struct sk_buff *skb)
 	struct rsi_hw *adapter = common->priv;
 	struct ieee80211_vif *vif;
 	struct ieee80211_tx_info *info;
-	struct skb_info *tx_params;
 	struct ieee80211_bss_conf *bss;
 	int status = -EINVAL;
-	u8 header_size;
 
 	if (!skb)
 		return 0;
@@ -297,8 +295,6 @@ int rsi_send_data_pkt(struct rsi_common *common, struct sk_buff *skb)
 		goto err;
 	vif = info->control.vif;
 	bss = &vif->bss_conf;
-	tx_params = (struct skb_info *)info->driver_data;
-	header_size = tx_params->internal_hdr_size;
 
 	if (((vif->type == NL80211_IFTYPE_STATION) ||
 	     (vif->type == NL80211_IFTYPE_P2P_CLIENT)) &&

^ permalink raw reply related

* Again the photos things
From: Jimmy Wilson @ 2018-08-31 11:58 UTC (permalink / raw)
  To: netdev

Hi,

Have you received my email from last week?

I would like to speak with the person who manage your photos for your
company?

We are here to provide you all kinds of imaging editing.

What we can provide you:
Cutting out for photos
Clipping path for photos
Masking for photos
Retouching for your photos
Retouching for the Beauty Model and Portraits.

We have 20 staffs in house and daily basis 1000 images can be processed.

We give testing for your photos.

Thanks,
Jimmy Wilson

^ permalink raw reply

* [PATCH v2] mt76: Enable NL80211_EXT_FEATURE_CQM_RSSI_LIST
From: Kristian Evensen @ 2018-09-01  8:38 UTC (permalink / raw)
  To: linux-wireless, kvalo, netdev, linux-kernel; +Cc: Kristian Evensen

Enable the use of CQM_RSSI_LIST with mt76-devices. The change has been
tested with the mt7602, mt7603 and mt7621 PCI wifi-cards. I passed a
list of RSSI thresholds to the driver, and when disconnecting/connecting
the antenna(s) I got an event each time the RSSI went above/below a
threshold.

While I have not been able to test the change with any of the mt76
USB-devices (no access to a device), the RX RSSI management code is
shared between the two device types. Thus, CQM should also work with the
mt76 USB-devices.

v1->v2:
* Updated commit message. Thanks Kalle Valo, Arend van Spriel,
Lorenzo Bianconi and Andrew Zaborowski.

Signed-off-by: Kristian Evensen <kristian.evensen@gmail.com>
---
 drivers/net/wireless/mediatek/mt76/mac80211.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index 029d54bce9e8..3eb328ff8c0d 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -305,6 +305,8 @@ int mt76_register_device(struct mt76_dev *dev, bool vht,

 	wiphy->features |= NL80211_FEATURE_ACTIVE_MONITOR;

+	wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST);
+
 	wiphy->available_antennas_tx = dev->antenna_mask;
 	wiphy->available_antennas_rx = dev->antenna_mask;

-- 
2.14.1

^ permalink raw reply related

* [PATCH net-next] bnxt_en: Properly get address type of encapsulation IP headers
From: YueHaibing @ 2018-09-01  9:25 UTC (permalink / raw)
  To: davem, michael.chan; +Cc: linux-kernel, netdev, YueHaibing

gcc '-Wunused-but-set-variable' warning:

drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c: In function 'bnxt_tc_parse_flow':
drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c:186:6: warning:
 variable 'addr_type' set but not used [-Wunused-but-set-variable]

As done elsewhere in TC/flower offload code, the address type of
the encapsulation IP headers should be realized accroding to the
addr_type field of the encapsulation control dissector key.

Fixes: 8c95f773b4a3 ("bnxt_en: add support for Flower based vxlan encap/decap offload")
Fixes: 2ae7408fedfe ("bnxt_en: bnxt: add TC flower filter offload support")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index 092c817..5c625e5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -242,7 +242,7 @@ static int bnxt_tc_parse_flow(struct bnxt *bp,
 		flow->l2_key.num_vlans = 1;
 	}
 
-	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
+	if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
 		struct flow_dissector_key_ipv4_addrs *key =
 			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_IPV4_ADDRS);
 		struct flow_dissector_key_ipv4_addrs *mask =
@@ -253,8 +253,7 @@ static int bnxt_tc_parse_flow(struct bnxt *bp,
 		flow->l3_mask.ipv4.daddr.s_addr = mask->dst;
 		flow->l3_key.ipv4.saddr.s_addr = key->src;
 		flow->l3_mask.ipv4.saddr.s_addr = mask->src;
-	} else if (dissector_uses_key(dissector,
-				      FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
+	} else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
 		struct flow_dissector_key_ipv6_addrs *key =
 			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_IPV6_ADDRS);
 		struct flow_dissector_key_ipv6_addrs *mask =
@@ -300,7 +299,7 @@ static int bnxt_tc_parse_flow(struct bnxt *bp,
 		addr_type = key->addr_type;
 	}
 
-	if (dissector_uses_key(dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
+	if (addr_type == FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) {
 		struct flow_dissector_key_ipv4_addrs *key =
 			GET_KEY(tc_flow_cmd, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS);
 		struct flow_dissector_key_ipv4_addrs *mask =
@@ -312,8 +311,7 @@ static int bnxt_tc_parse_flow(struct bnxt *bp,
 		flow->tun_mask.u.ipv4.dst = mask->dst;
 		flow->tun_key.u.ipv4.src = key->src;
 		flow->tun_mask.u.ipv4.src = mask->src;
-	} else if (dissector_uses_key(dissector,
-				      FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
+	} else if (addr_type == FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) {
 		return -EOPNOTSUPP;
 	}
 
-- 
2.7.0

^ permalink raw reply related

* [PATCH] net: nfc: nci: Fix a sleep-in-atomic-context bug in nci_uart_default_recv_buf()
From: Jia-Ju Bai @ 2018-09-01  9:51 UTC (permalink / raw)
  To: sameo, davem, viro; +Cc: linux-wireless, netdev, linux-kernel, Jia-Ju Bai

The kernel module may sleep with holding a spinlock.

The function call paths (from bottom to top) in Linux-4.16 are:

[FUNC] nci_skb_alloc(GFP_KERNEL)
net/nfc/nci/uart.c, 349: 
	nci_skb_alloc in nci_uart_default_recv_buf
net/nfc/nci/uart.c, 255: 
	[FUNC_PTR]nci_uart_default_recv_buf in nci_uart_tty_receive
net/nfc/nci/uart.c, 254: 
	spin_lock in nci_uart_tty_receive

Note that [FUNC_PTR] means a function pointer call is used.

To fix this bug, GFP_KERNEL is replaced with GFP_ATOMIC.

This bug is found by my static analysis tool DSAC.

Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
---
 net/nfc/nci/uart.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
index a66f102c6c01..040576dd73bb 100644
--- a/net/nfc/nci/uart.c
+++ b/net/nfc/nci/uart.c
@@ -348,7 +348,7 @@ static int nci_uart_default_recv_buf(struct nci_uart *nu, const u8 *data,
 			nu->rx_packet_len = -1;
 			nu->rx_skb = nci_skb_alloc(nu->ndev,
 						   NCI_MAX_PACKET_SIZE,
-						   GFP_KERNEL);
+						   GFP_ATOMIC);
 			if (!nu->rx_skb)
 				return -ENOMEM;
 		}
-- 
2.17.0

^ permalink raw reply related

* [PATCH] net: scm: Fix a possible sleep-in-atomic-context bug in scm_fp_copy()
From: Jia-Ju Bai @ 2018-09-01 10:00 UTC (permalink / raw)
  To: davem, ktkhai, viro, adobriyan, dvlasenk, xiyou.wangcong
  Cc: netdev, linux-kernel, Jia-Ju Bai

The kernel module may sleep with holding a spinlock.

The function call paths (from bottom to top) in Linux-4.16 are:

[FUNC] kmalloc(GFP_KERNEL)
net/core/scm.c, 85: kmalloc in scm_fp_copy
net/core/scm.c, 161: scm_fp_copy in __scm_send
./include/net/scm.h, 88: __scm_send in scm_send
net/unix/af_unix.c, 1600: scm_send in maybe_init_creds
net/unix/af_unix.c, 1983: maybe_init_creds in unix_stream_sendpage
net/unix/af_unix.c, 1973: spin_lock in unix_stream_sendpage

To fix this bug, GFP_KERNEL is replaced with GFP_ATOMIC.

This bug is found by my static analysis tool DSAC.

Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
---
 net/core/scm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/scm.c b/net/core/scm.c
index b1ff8a441748..49548f81c45b 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -82,7 +82,7 @@ static int scm_fp_copy(struct cmsghdr *cmsg, struct scm_fp_list **fplp)
 
 	if (!fpl)
 	{
-		fpl = kmalloc(sizeof(struct scm_fp_list), GFP_KERNEL);
+		fpl = kmalloc(sizeof(struct scm_fp_list), GFP_ATOMIC);
 		if (!fpl)
 			return -ENOMEM;
 		*fplp = fpl;
-- 
2.17.0

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox