Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH v2 net-next 2/3] net/ipv6: Udate fib6_table_lookup tracepoint
From: dsahern @ 2018-05-24  0:08 UTC (permalink / raw)
  To: netdev; +Cc: David Ahern
In-Reply-To: <20180524000849.6553-1-dsahern@kernel.org>

From: David Ahern <dsahern@gmail.com>

Commit bb0ad1987e96 ("ipv6: fib6_rules: support for match on sport, dport
and ip proto") added support for protocol and ports to FIB rules.
Update the FIB lookup tracepoint to dump the parameters.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/trace/events/fib6.h | 29 ++++++++++++++++++++++-------
 net/core/net-traces.c       |  4 ----
 net/ipv6/route.c            |  9 +++++++--
 3 files changed, 29 insertions(+), 13 deletions(-)

diff --git a/include/trace/events/fib6.h b/include/trace/events/fib6.h
index 1b8d951e3c12..b088b54d699c 100644
--- a/include/trace/events/fib6.h
+++ b/include/trace/events/fib6.h
@@ -19,7 +19,7 @@ TRACE_EVENT(fib6_table_lookup,
 
 	TP_STRUCT__entry(
 		__field(	u32,	tb_id		)
-
+		__field(	int,	err		)
 		__field(	int,	oif		)
 		__field(	int,	iif		)
 		__field(	__u8,	tos		)
@@ -27,7 +27,10 @@ TRACE_EVENT(fib6_table_lookup,
 		__field(	__u8,	flags		)
 		__array(	__u8,	src,	16	)
 		__array(	__u8,	dst,	16	)
-
+		__field(        u16,	sport		)
+		__field(        u16,	dport		)
+		__field(        u8,	proto		)
+		__field(        u8,	rt_type		)
 		__dynamic_array(	char,	name,	IFNAMSIZ )
 		__array(		__u8,	gw,	16	 )
 	),
@@ -36,6 +39,7 @@ TRACE_EVENT(fib6_table_lookup,
 		struct in6_addr *in6;
 
 		__entry->tb_id = table->tb6_id;
+		__entry->err = ip6_rt_type_to_error(f6i->fib6_type);
 		__entry->oif = flp->flowi6_oif;
 		__entry->iif = flp->flowi6_iif;
 		__entry->tos = ip6_tclass(flp->flowlabel);
@@ -48,10 +52,20 @@ TRACE_EVENT(fib6_table_lookup,
 		in6 = (struct in6_addr *)__entry->dst;
 		*in6 = flp->daddr;
 
+		__entry->proto = flp->flowi6_proto;
+		if (__entry->proto == IPPROTO_TCP ||
+		    __entry->proto == IPPROTO_UDP) {
+			__entry->sport = ntohs(flp->fl6_sport);
+			__entry->dport = ntohs(flp->fl6_dport);
+		} else {
+			__entry->sport = 0;
+			__entry->dport = 0;
+		}
+
 		if (f6i->fib6_nh.nh_dev) {
 			__assign_str(name, f6i->fib6_nh.nh_dev);
 		} else {
-			__assign_str(name, "");
+			__assign_str(name, "-");
 		}
 		if (f6i == net->ipv6.fib6_null_entry) {
 			struct in6_addr in6_zero = {};
@@ -65,10 +79,11 @@ TRACE_EVENT(fib6_table_lookup,
 		}
 	),
 
-	TP_printk("table %3u oif %d iif %d src %pI6c dst %pI6c tos %d scope %d flags %x ==> dev %s gw %pI6c",
-		  __entry->tb_id, __entry->oif, __entry->iif,
-		  __entry->src, __entry->dst, __entry->tos, __entry->scope,
-		  __entry->flags, __get_str(name), __entry->gw)
+	TP_printk("table %3u oif %d iif %d proto %u %pI6c/%u -> %pI6c/%u tos %d scope %d flags %x ==> dev %s gw %pI6c err %d",
+		  __entry->tb_id, __entry->oif, __entry->iif, __entry->proto,
+		  __entry->src, __entry->sport, __entry->dst, __entry->dport,
+		  __entry->tos, __entry->scope, __entry->flags,
+		  __get_str(name), __entry->gw, __entry->err)
 );
 
 #endif /* _TRACE_FIB6_H */
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index 380934580fa1..419af6dfe29f 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -35,10 +35,6 @@
 #include <trace/events/tcp.h>
 #include <trace/events/fib.h>
 #include <trace/events/qdisc.h>
-#if IS_ENABLED(CONFIG_IPV6)
-#include <trace/events/fib6.h>
-EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
-#endif
 #if IS_ENABLED(CONFIG_BRIDGE)
 #include <trace/events/bridge.h>
 EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_add);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index bcb8785c0451..8d9e460ff88b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -63,14 +63,19 @@
 #include <net/lwtunnel.h>
 #include <net/ip_tunnels.h>
 #include <net/l3mdev.h>
-#include <trace/events/fib6.h>
-
 #include <linux/uaccess.h>
 
 #ifdef CONFIG_SYSCTL
 #include <linux/sysctl.h>
 #endif
 
+static int ip6_rt_type_to_error(u8 fib6_type);
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/fib6.h>
+EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup);
+#undef CREATE_TRACE_POINTS
+
 enum rt6_nud_state {
 	RT6_NUD_FAIL_HARD = -3,
 	RT6_NUD_FAIL_PROBE = -2,
-- 
2.11.0

^ permalink raw reply related

* [PATCH v2 net-next 0/3] net: Update fib_table_lookup tracepoints
From: dsahern @ 2018-05-24  0:08 UTC (permalink / raw)
  To: netdev; +Cc: David Ahern

From: David Ahern <dsahern@gmail.com>

Update the FIB lookup tracepoints to include ip proto and port fields
from the flow struct. In the process make the IPv4 tracepoint inline
with IPv6 which is much easier to use and follow the lookup and result.

Remove the tracepoint in fib_validate_source which does not provide
value above the fib_table_lookup which immediately follows it.

v2
- move CREATE_TRACE_POINTS for the v6 tracepoint to route.c to handle
  its need for an internal function to convert route type to error and
  handle IPv6 as a module or builtin. Reported by kbuild robot.

David Ahern (3):
  net/ipv4: Udate fib_table_lookup tracepoint
  net/ipv6: Udate fib6_table_lookup tracepoint
  net/ipv4: Remove tracepoint in fib_validate_source

 include/trace/events/fib.h  | 107 ++++++++++++++++++--------------------------
 include/trace/events/fib6.h |  29 +++++++++---
 net/core/net-traces.c       |   4 --
 net/ipv4/fib_frontend.c     |   2 -
 net/ipv4/fib_trie.c         |  14 +++---
 net/ipv6/route.c            |   9 +++-
 6 files changed, 81 insertions(+), 84 deletions(-)

-- 
2.11.0

^ permalink raw reply

* [PATCH v2 net-next 3/3] net/ipv4: Remove tracepoint in fib_validate_source
From: dsahern @ 2018-05-24  0:08 UTC (permalink / raw)
  To: netdev; +Cc: David Ahern
In-Reply-To: <20180524000849.6553-1-dsahern@kernel.org>

From: David Ahern <dsahern@gmail.com>

Tracepoint does not add value and the call to fib_lookup follows
it which shows the same information and the fib lookup result.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/trace/events/fib.h | 35 -----------------------------------
 net/ipv4/fib_frontend.c    |  2 --
 2 files changed, 37 deletions(-)

diff --git a/include/trace/events/fib.h b/include/trace/events/fib.h
index f5a1d4c518d8..9763cddd0594 100644
--- a/include/trace/events/fib.h
+++ b/include/trace/events/fib.h
@@ -87,41 +87,6 @@ TRACE_EVENT(fib_table_lookup,
 		  __entry->tos, __entry->scope, __entry->flags,
 		  __get_str(name), __entry->gw, __entry->saddr, __entry->err)
 );
-
-TRACE_EVENT(fib_validate_source,
-
-	TP_PROTO(const struct net_device *dev, const struct flowi4 *flp),
-
-	TP_ARGS(dev, flp),
-
-	TP_STRUCT__entry(
-		__string(	name,	dev->name	)
-		__field(	int,	oif		)
-		__field(	int,	iif		)
-		__field(	__u8,	tos		)
-		__array(	__u8,	src,	4	)
-		__array(	__u8,	dst,	4	)
-	),
-
-	TP_fast_assign(
-		__be32 *p32;
-
-		__assign_str(name, dev ? dev->name : "not set");
-		__entry->oif = flp->flowi4_oif;
-		__entry->iif = flp->flowi4_iif;
-		__entry->tos = flp->flowi4_tos;
-
-		p32 = (__be32 *) __entry->src;
-		*p32 = flp->saddr;
-
-		p32 = (__be32 *) __entry->dst;
-		*p32 = flp->daddr;
-	),
-
-	TP_printk("dev %s oif %d iif %d tos %d src %pI4 dst %pI4",
-		  __get_str(name), __entry->oif, __entry->iif, __entry->tos,
-		  __entry->src, __entry->dst)
-);
 #endif /* _TRACE_FIB_H */
 
 /* This part must be outside protection */
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 4d622112bf95..58696b829065 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -354,8 +354,6 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 		fl4.fl4_dport = 0;
 	}
 
-	trace_fib_validate_source(dev, &fl4);
-
 	if (fib_lookup(net, &fl4, &res, 0))
 		goto last_resort;
 	if (res.type != RTN_UNICAST &&
-- 
2.11.0

^ permalink raw reply related

* [PATCH v2 net-next 1/3] net/ipv4: Udate fib_table_lookup tracepoint
From: dsahern @ 2018-05-24  0:08 UTC (permalink / raw)
  To: netdev; +Cc: David Ahern
In-Reply-To: <20180524000849.6553-1-dsahern@kernel.org>

From: David Ahern <dsahern@gmail.com>

Commit 4a2d73a4fb36 ("ipv4: fib_rules: support match on sport, dport
and ip proto") added support for protocol and ports to FIB rules.
Update the FIB lookup tracepoint to dump the parameters.

In addition, make the IPv4 tracepoint similar to the IPv6 one where
the lookup parameters and result are dumped in 1 event. It is much
easier to use and understand the outcome of the lookup.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/trace/events/fib.h | 72 +++++++++++++++++++++++++++-------------------
 net/ipv4/fib_trie.c        | 14 +++++----
 2 files changed, 52 insertions(+), 34 deletions(-)

diff --git a/include/trace/events/fib.h b/include/trace/events/fib.h
index 81b7e985bb45..f5a1d4c518d8 100644
--- a/include/trace/events/fib.h
+++ b/include/trace/events/fib.h
@@ -12,12 +12,14 @@
 
 TRACE_EVENT(fib_table_lookup,
 
-	TP_PROTO(u32 tb_id, const struct flowi4 *flp),
+	TP_PROTO(u32 tb_id, const struct flowi4 *flp,
+		 const struct fib_nh *nh, int err),
 
-	TP_ARGS(tb_id, flp),
+	TP_ARGS(tb_id, flp, nh, err),
 
 	TP_STRUCT__entry(
 		__field(	u32,	tb_id		)
+		__field(	int,	err		)
 		__field(	int,	oif		)
 		__field(	int,	iif		)
 		__field(	__u8,	tos		)
@@ -25,12 +27,19 @@ TRACE_EVENT(fib_table_lookup,
 		__field(	__u8,	flags		)
 		__array(	__u8,	src,	4	)
 		__array(	__u8,	dst,	4	)
+		__array(	__u8,	gw,	4	)
+		__array(	__u8,	saddr,	4	)
+		__field(	u16,	sport		)
+		__field(	u16,	dport		)
+		__field(	u8,	proto		)
+		__dynamic_array(char,  name,   IFNAMSIZ )
 	),
 
 	TP_fast_assign(
 		__be32 *p32;
 
 		__entry->tb_id = tb_id;
+		__entry->err = err;
 		__entry->oif = flp->flowi4_oif;
 		__entry->iif = flp->flowi4_iif;
 		__entry->tos = flp->flowi4_tos;
@@ -42,36 +51,41 @@ TRACE_EVENT(fib_table_lookup,
 
 		p32 = (__be32 *) __entry->dst;
 		*p32 = flp->daddr;
-	),
-
-	TP_printk("table %u oif %d iif %d src %pI4 dst %pI4 tos %d scope %d flags %x",
-		  __entry->tb_id, __entry->oif, __entry->iif,
-		  __entry->src, __entry->dst, __entry->tos, __entry->scope,
-		  __entry->flags)
-);
-
-TRACE_EVENT(fib_table_lookup_nh,
-
-	TP_PROTO(const struct fib_nh *nh),
-
-	TP_ARGS(nh),
-
-	TP_STRUCT__entry(
-		__string(	name,	nh->nh_dev->name)
-		__field(	int,	oif		)
-		__array(	__u8,	src,	4	)
-	),
-
-	TP_fast_assign(
-		__be32 *p32 = (__be32 *) __entry->src;
 
-		__assign_str(name, nh->nh_dev ? nh->nh_dev->name : "not set");
-		__entry->oif = nh->nh_oif;
-		*p32 = nh->nh_saddr;
+		__entry->proto = flp->flowi4_proto;
+		if (__entry->proto == IPPROTO_TCP ||
+		    __entry->proto == IPPROTO_UDP) {
+			__entry->sport = ntohs(flp->fl4_sport);
+			__entry->dport = ntohs(flp->fl4_dport);
+		} else {
+			__entry->sport = 0;
+			__entry->dport = 0;
+		}
+
+		if (nh) {
+			p32 = (__be32 *) __entry->saddr;
+			*p32 = nh->nh_saddr;
+
+			p32 = (__be32 *) __entry->gw;
+			*p32 = nh->nh_gw;
+
+			__assign_str(name, nh->nh_dev ? nh->nh_dev->name : "-");
+		} else {
+			p32 = (__be32 *) __entry->saddr;
+			*p32 = 0;
+
+			p32 = (__be32 *) __entry->gw;
+			*p32 = 0;
+
+			__assign_str(name, "-");
+		}
 	),
 
-	TP_printk("nexthop dev %s oif %d src %pI4",
-		  __get_str(name), __entry->oif, __entry->src)
+	TP_printk("table %u oif %d iif %d proto %u %pI4/%u -> %pI4/%u tos %d scope %d flags %x ==> dev %s gw %pI4 src %pI4 err %d",
+		  __entry->tb_id, __entry->oif, __entry->iif, __entry->proto,
+		  __entry->src, __entry->sport, __entry->dst, __entry->dport,
+		  __entry->tos, __entry->scope, __entry->flags,
+		  __get_str(name), __entry->gw, __entry->saddr, __entry->err)
 );
 
 TRACE_EVENT(fib_validate_source,
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 3dcffd3ce98c..65c340f230ae 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1326,14 +1326,14 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 	unsigned long index;
 	t_key cindex;
 
-	trace_fib_table_lookup(tb->tb_id, flp);
-
 	pn = t->kv;
 	cindex = 0;
 
 	n = get_child_rcu(pn, cindex);
-	if (!n)
+	if (!n) {
+		trace_fib_table_lookup(tb->tb_id, flp, NULL, -EAGAIN);
 		return -EAGAIN;
+	}
 
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 	this_cpu_inc(stats->gets);
@@ -1416,8 +1416,11 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 				 * nothing for us to do as we do not have any
 				 * further nodes to parse.
 				 */
-				if (IS_TRIE(pn))
+				if (IS_TRIE(pn)) {
+					trace_fib_table_lookup(tb->tb_id, flp,
+							       NULL, -EAGAIN);
 					return -EAGAIN;
+				}
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 				this_cpu_inc(stats->backtrack);
 #endif
@@ -1459,6 +1462,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 			this_cpu_inc(stats->semantic_match_passed);
 #endif
+			trace_fib_table_lookup(tb->tb_id, flp, NULL, err);
 			return err;
 		}
 		if (fi->fib_flags & RTNH_F_DEAD)
@@ -1494,7 +1498,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 			this_cpu_inc(stats->semantic_match_passed);
 #endif
-			trace_fib_table_lookup_nh(nh);
+			trace_fib_table_lookup(tb->tb_id, flp, nh, err);
 
 			return err;
 		}
-- 
2.11.0

^ permalink raw reply related

* Re: [PATCH RFC net-next 00/11] udp gso
From: Marcelo Ricardo Leitner @ 2018-05-24  0:02 UTC (permalink / raw)
  To: Willem de Bruijn; +Cc: Paolo Abeni, Network Development, Willem de Bruijn
In-Reply-To: <CAF=yD-LaDvQdkE_BkZX7o1ukjyodWiwK=nJ5S=bTgJ-91KBhHg@mail.gmail.com>

On Wed, Apr 18, 2018 at 09:49:18AM -0400, Willem de Bruijn wrote:
> I just hacked up a sendmmsg extension to the benchmark to verify.
> Indeed that does not have nearly the same benefit as GSO:
> 
> udp tx:    976 MB/s   695394 calls/s  16557 msg/s
> 
> This matches the numbers seen from TCP without TSO and GSO.
> That also has few system calls, but observes per MTU stack traversal.

Reviving this old thread because it's the only place I saw sendmmsg
being mentioned.

sendmmsg shouldn't be considered as an alternative, but rather as a
complement. Then instead of the application building one large request
and request the stack to fragment it, it could simply build the
sendmmsg request and the stack would group the mmsg into a gso skb. It
seems more natural to the application. But well, both (sendmmsg and
the option to fragment) are Linux-specific..

For that we need sendmmsg to do something smarter than doing several
sendmsg calls, yes.

^ permalink raw reply

* Re: [PATCH net-next v15 4/7] sch_cake: Add NAT awareness to packet classifier
From: Toke Høiland-Jørgensen @ 2018-05-23 23:25 UTC (permalink / raw)
  To: Pablo Neira Ayuso; +Cc: netdev, cake, netfilter-devel
In-Reply-To: <20180523224653.mvxkibc4x37nbhha@salvia>

Pablo Neira Ayuso <pablo@netfilter.org> writes:

> On Tue, May 22, 2018 at 04:11:06PM +0200, Toke Høiland-Jørgensen wrote:
>> Pablo Neira Ayuso <pablo@netfilter.org> writes:
>> 
>> > Hi Toke,
>> >
>> > On Tue, May 22, 2018 at 03:57:38PM +0200, Toke Høiland-Jørgensen wrote:
>> >> When CAKE is deployed on a gateway that also performs NAT (which is a
>> >> common deployment mode), the host fairness mechanism cannot distinguish
>> >> internal hosts from each other, and so fails to work correctly.
>> >> 
>> >> To fix this, we add an optional NAT awareness mode, which will query the
>> >> kernel conntrack mechanism to obtain the pre-NAT addresses for each packet
>> >> and use that in the flow and host hashing.
>> >> 
>> >> When the shaper is enabled and the host is already performing NAT, the cost
>> >> of this lookup is negligible. However, in unlimited mode with no NAT being
>> >> performed, there is a significant CPU cost at higher bandwidths. For this
>> >> reason, the feature is turned off by default.
>> >> 
>> >> Cc: netfilter-devel@vger.kernel.org
>> >> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
>> >> ---
>> >>  net/sched/sch_cake.c |   79 ++++++++++++++++++++++++++++++++++++++++++++++++++
>> >>  1 file changed, 79 insertions(+)
>> >> 
>> >> diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
>> >> index 68ac908470f1..6f7cae705c84 100644
>> >> --- a/net/sched/sch_cake.c
>> >> +++ b/net/sched/sch_cake.c
>> >> @@ -71,6 +71,12 @@
>> >>  #include <net/tcp.h>
>> >>  #include <net/flow_dissector.h>
>> >>  
>> >> +#if IS_REACHABLE(CONFIG_NF_CONNTRACK)
>> >> +#include <net/netfilter/nf_conntrack_core.h>
>> >> +#include <net/netfilter/nf_conntrack_zones.h>
>> >> +#include <net/netfilter/nf_conntrack.h>
>> >> +#endif
>> >> +
>> >>  #define CAKE_SET_WAYS (8)
>> >>  #define CAKE_MAX_TINS (8)
>> >>  #define CAKE_QUEUES (1024)
>> >> @@ -516,6 +522,60 @@ static bool cobalt_should_drop(struct cobalt_vars *vars,
>> >>  	return drop;
>> >>  }
>> >>  
>> >> +#if IS_REACHABLE(CONFIG_NF_CONNTRACK)
>> >> +
>> >> +static void cake_update_flowkeys(struct flow_keys *keys,
>> >> +				 const struct sk_buff *skb)
>> >> +{
>> >> +	const struct nf_conntrack_tuple *tuple;
>> >> +	enum ip_conntrack_info ctinfo;
>> >> +	struct nf_conn *ct;
>> >> +	bool rev = false;
>> >> +
>> >> +	if (tc_skb_protocol(skb) != htons(ETH_P_IP))
>> >> +		return;
>> >> +
>> >> +	ct = nf_ct_get(skb, &ctinfo);
>> >> +	if (ct) {
>> >> +		tuple = nf_ct_tuple(ct, CTINFO2DIR(ctinfo));
>> >> +	} else {
>> >> +		const struct nf_conntrack_tuple_hash *hash;
>> >> +		struct nf_conntrack_tuple srctuple;
>> >> +
>> >> +		if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
>> >> +				       NFPROTO_IPV4, dev_net(skb->dev),
>> >> +				       &srctuple))
>> >> +			return;
>> >> +
>> >> +		hash = nf_conntrack_find_get(dev_net(skb->dev),
>> >> +					     &nf_ct_zone_dflt,
>> >> +					     &srctuple);
>> >> +		if (!hash)
>> >> +			return;
>> >> +
>> >> +		rev = true;
>> >> +		ct = nf_ct_tuplehash_to_ctrack(hash);
>> >> +		tuple = nf_ct_tuple(ct, !hash->tuple.dst.dir);
>> >> +	}
>> >> +
>> >> +	keys->addrs.v4addrs.src = rev ? tuple->dst.u3.ip : tuple->src.u3.ip;
>> >> +	keys->addrs.v4addrs.dst = rev ? tuple->src.u3.ip : tuple->dst.u3.ip;
>> >> +
>> >> +	if (keys->ports.ports) {
>> >> +		keys->ports.src = rev ? tuple->dst.u.all : tuple->src.u.all;
>> >> +		keys->ports.dst = rev ? tuple->src.u.all : tuple->dst.u.all;
>> >> +	}
>> >> +	if (rev)
>> >> +		nf_ct_put(ct);
>> >> +}
>> >
>> > This is going to pull in the nf_conntrack module, even if you may not
>> > want it, as soon as cake is in place.
>> 
>> Yeah, we are aware of that; we get a moddep on nf_conntrack. Our main
>> deployment scenario has been home routers where conntrack is used
>> anyway, so this has not been much of an issue. However, if there is a
>> way to avoid this, and instead detect at runtime if conntrack is
>> available, that would certainly be useful. Is there? :)
>
> Yes, there is.
>
> You place this function in net/netfilter/nf_conntrack_core.c, call it
> nf_conntrack_get_tuple() which internally uses a rcu hook for this.
> See nf_ct_attach() and ip_ct_attach() in net/netfilter/core.c for
> instance.
>
> This allows you to avoid the dependency with nf_conntrack (which would
> be only called if the module has been explicitly loaded), which is
> what you're searching for.

Ah, awesome! I'll look into that; thanks :)

-Toke

^ permalink raw reply

* [PATCH V4] mlx4_core: allocate ICM memory in page size chunks
From: Qing Huang @ 2018-05-23 23:22 UTC (permalink / raw)
  To: tariqt, davem, haakon.bugge, yanjun.zhu
  Cc: netdev, linux-rdma, linux-kernel, gi-oh.kim, Qing Huang

When a system is under memory presure (high usage with fragments),
the original 256KB ICM chunk allocations will likely trigger kernel
memory management to enter slow path doing memory compact/migration
ops in order to complete high order memory allocations.

When that happens, user processes calling uverb APIs may get stuck
for more than 120s easily even though there are a lot of free pages
in smaller chunks available in the system.

Syslog:
...
Dec 10 09:04:51 slcc03db02 kernel: [397078.572732] INFO: task
oracle_205573_e:205573 blocked for more than 120 seconds.
...

With 4KB ICM chunk size on x86_64 arch, the above issue is fixed.

However in order to support smaller ICM chunk size, we need to fix
another issue in large size kcalloc allocations.

E.g.
Setting log_num_mtt=30 requires 1G mtt entries. With the 4KB ICM chunk
size, each ICM chunk can only hold 512 mtt entries (8 bytes for each mtt
entry). So we need a 16MB allocation for a table->icm pointer array to
hold 2M pointers which can easily cause kcalloc to fail.

The solution is to use kvzalloc to replace kcalloc which will fall back
to vmalloc automatically if kmalloc fails.

Signed-off-by: Qing Huang <qing.huang@oracle.com>
Acked-by: Daniel Jurgens <danielj@mellanox.com>
Reviewed-by: Zhu Yanjun <yanjun.zhu@oracle.com>
---
v4: use kvzalloc instead of vzalloc
    add one err condition check
    don't include vmalloc.h any more

v3: use PAGE_SIZE instead of PAGE_SHIFT
    add comma to the end of enum variables
    include vmalloc.h header file to avoid build issues on Sparc

v2: adjusted chunk size to reflect different architectures

 drivers/net/ethernet/mellanox/mlx4/icm.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c
index a822f7a..685337d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.c
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.c
@@ -43,12 +43,12 @@
 #include "fw.h"
 
 /*
- * We allocate in as big chunks as we can, up to a maximum of 256 KB
- * per chunk.
+ * We allocate in page size (default 4KB on many archs) chunks to avoid high
+ * order memory allocations in fragmented/high usage memory situation.
  */
 enum {
-	MLX4_ICM_ALLOC_SIZE	= 1 << 18,
-	MLX4_TABLE_CHUNK_SIZE	= 1 << 18
+	MLX4_ICM_ALLOC_SIZE	= PAGE_SIZE,
+	MLX4_TABLE_CHUNK_SIZE	= PAGE_SIZE,
 };
 
 static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chunk)
@@ -398,9 +398,11 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
 	u64 size;
 
 	obj_per_chunk = MLX4_TABLE_CHUNK_SIZE / obj_size;
+	if (WARN_ON(!obj_per_chunk))
+		return -EINVAL;
 	num_icm = (nobj + obj_per_chunk - 1) / obj_per_chunk;
 
-	table->icm      = kcalloc(num_icm, sizeof(*table->icm), GFP_KERNEL);
+	table->icm      = kvzalloc(num_icm * sizeof(*table->icm), GFP_KERNEL);
 	if (!table->icm)
 		return -ENOMEM;
 	table->virt     = virt;
@@ -446,7 +448,7 @@ int mlx4_init_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table,
 			mlx4_free_icm(dev, table->icm[i], use_coherent);
 		}
 
-	kfree(table->icm);
+	kvfree(table->icm);
 
 	return -ENOMEM;
 }
@@ -462,5 +464,5 @@ void mlx4_cleanup_icm_table(struct mlx4_dev *dev, struct mlx4_icm_table *table)
 			mlx4_free_icm(dev, table->icm[i], table->coherent);
 		}
 
-	kfree(table->icm);
+	kvfree(table->icm);
 }
-- 
2.9.3

^ permalink raw reply related

* Re: [PATCH net-next v3] net: sched: don't disable bh when accessing action idr
From: Cong Wang @ 2018-05-23 23:14 UTC (permalink / raw)
  To: Vlad Buslov
  Cc: Jiri Pirko, Linux Kernel Network Developers, Jamal Hadi Salim,
	David Miller, LKML
In-Reply-To: <1527065574-11299-1-git-send-email-vladbu@mellanox.com>

On Wed, May 23, 2018 at 1:52 AM, Vlad Buslov <vladbu@mellanox.com> wrote:
> Initial net_device implementation used ingress_lock spinlock to synchronize
> ingress path of device. This lock was used in both process and bh context.
> In some code paths action map lock was obtained while holding ingress_lock.
> Commit e1e992e52faa ("[NET_SCHED] protect action config/dump from irqs")
> modified actions to always disable bh, while using action map lock, in
> order to prevent deadlock on ingress_lock in softirq. This lock was removed
> in commit 555353cfa1ae ("netdev: The ingress_lock member is no longer
> needed.").
>
> Another reason to disable bh was filters delete code, that released actions
> in rcu callback. This code was changed to release actions from workqueue
> context in patch set "net_sched: close the race between call_rcu() and
> cleanup_net()".
>
> With these changes it is no longer necessary to continue disable bh while
> accessing action map.
>
> Replace all action idr spinlock usage with regular calls that do not
> disable bh.

Looks much better now!

I _guess_ we perhaps can even get rid of this spinlock since most of
the callers hold RTNL lock, not sure about the dump() path where
RTNL might be removed recently.

Anyway,

Acked-by: Cong Wang <xiyou.wangcong@gmail.com>

^ permalink raw reply

* Re: INFO: rcu detected stall in corrupted
From: Marcelo Ricardo Leitner @ 2018-05-23 23:13 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: David Miller, syzbot+f116bc1994efe725d51b, kuznet, linux-kernel,
	netdev, syzkaller-bugs, yoshfuji, dsahern, roopa, linux-sctp,
	Xin Long
In-Reply-To: <f8d0d282-1e75-d86a-8872-e32b57a6ec14@gmail.com>

On Mon, May 21, 2018 at 11:13:46AM -0700, Eric Dumazet wrote:
> 
> 
> On 05/21/2018 11:09 AM, David Miller wrote:
> > From: syzbot <syzbot+f116bc1994efe725d51b@syzkaller.appspotmail.com>
> > Date: Mon, 21 May 2018 11:05:02 -0700
> > 
> >>  find_match+0x244/0x13a0 net/ipv6/route.c:691
> >>  find_rr_leaf net/ipv6/route.c:729 [inline]
> >>  rt6_select net/ipv6/route.c:779 [inline]
> > 
> > Hmmm, endless loop in find_rr_leaf or similar?
> > 
> 
> 
> I do not think so, this really looks like SCTP specific 
> , we now have dozens of traces all sharing :
> 
>  sctp_transport_route+0xad/0x450 net/sctp/transport.c:293
>  sctp_packet_config+0xb89/0xfd0 net/sctp/output.c:123
>  sctp_outq_flush+0x79c/0x4370 net/sctp/outqueue.c:894
>  sctp_outq_uncork+0x6a/0x80 net/sctp/outqueue.c:776
>  sctp_cmd_interpreter net/sctp/sm_sideeffect.c:1820 [inline]
>  sctp_side_effects net/sctp/sm_sideeffect.c:1220 [inline]
>  sctp_do_sm+0x596/0x7160 net/sctp/sm_sideeffect.c:1191
>  sctp_generate_heartbeat_event+0x218/0x450 net/sctp/sm_sideeffect.c:406
>  call_timer_fn+0x230/0x940 kernel/time/timer.c:1326 
> 
> 
> Some kind of infinite loop.
> 
> When the hrtimer fires, it can point to any code that sits below but does not necessarily have a bug.

Agreed. Xin Long identified the root cause. syzkaller is setting too
aggressive parameters to SCTP RTO, leading to issues with the
heartbeat timer.

^ permalink raw reply

* Re: [PATCH v2] ppp: remove the PPPIOCDETACH ioctl
From: Paul Mackerras @ 2018-05-23 23:04 UTC (permalink / raw)
  To: Eric Biggers
  Cc: linux-ppp, netdev, linux-fsdevel, linux-kernel, Guillaume Nault,
	syzkaller-bugs, Eric Biggers
In-Reply-To: <20180523213738.146911-1-ebiggers3@gmail.com>

On Wed, May 23, 2018 at 02:37:38PM -0700, Eric Biggers wrote:
> From: Eric Biggers <ebiggers@google.com>
> 
> The PPPIOCDETACH ioctl effectively tries to "close" the given ppp file
> before f_count has reached 0, which is fundamentally a bad idea.  It
> does check 'f_count < 2', which excludes concurrent operations on the
> file since they would only be possible with a shared fd table, in which
> case each fdget() would take a file reference.  However, it fails to
> account for the fact that even with 'f_count == 1' the file can still be
> linked into epoll instances.  As reported by syzbot, this can trivially
> be used to cause a use-after-free.
> 
> Yet, the only known user of PPPIOCDETACH is pppd versions older than
> ppp-2.4.2, which was released almost 15 years ago (November 2003).
> Also, PPPIOCDETACH apparently stopped working reliably at around the
> same time, when the f_count check was added to the kernel, e.g. see
> https://lkml.org/lkml/2002/12/31/83.  Also, the current 'f_count < 2'
> check makes PPPIOCDETACH only work in single-threaded applications; it
> always fails if called from a multithreaded application.
> 
> All pppd versions released in the last 15 years just close() the file
> descriptor instead.
> 
> Therefore, instead of hacking around this bug by exporting epoll
> internals to modules, and probably missing other related bugs, just
> remove the PPPIOCDETACH ioctl and see if anyone actually notices.  Leave
> a stub in place that prints a one-time warning and returns EINVAL.
> 
> Reported-by: syzbot+16363c99d4134717c05b@syzkaller.appspotmail.com
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> Cc: stable@vger.kernel.org
> Signed-off-by: Eric Biggers <ebiggers@google.com>

Acked-by: Paul Mackerras <paulus@ozlabs.org>

^ permalink raw reply

* Re: [PATCH net-next] net:sched: add action inheritdsfield to skbmod
From: Cong Wang @ 2018-05-23 23:01 UTC (permalink / raw)
  To: Fu, Qiaobin
  Cc: davem@davemloft.net, netdev@vger.kernel.org, jhs@mojatatu.com,
	Michel Machado
In-Reply-To: <2F042100-2BAC-48E5-887C-5D426B1D5A5B@bu.edu>

On Thu, May 17, 2018 at 12:33 PM, Fu, Qiaobin <qiaobinf@bu.edu> wrote:
> net/sched: add action inheritdsfield to skbmod
>
> The new action inheritdsfield copies the field DS of
> IPv4 and IPv6 packets into skb->prioriry. This enables
> later classification of packets based on the DS field.
>
> Original idea by Jamal Hadi Salim <jhs@mojatatu.com>
>
> Signed-off-by: Qiaobin Fu <qiaobinf@bu.edu>
> Reviewed-by: Michel Machado <michel@digirati.com.br>

Hmm, but skbedit seems better than skbmod for this job,
given:

1) It already modifies skb->priority, although with a given value

2) skbmod doesn't change skb metadata, it only changes payload

I am _not_ saying there is strict rule for what skbmod can or can't
change, it calls itself "data modifier", so I am saying we probably
need to follow this existing practice.

^ permalink raw reply

* [PATCH RFC net-next 9/9] selftests: fib_tests: Add prefix route tests with metric
From: dsahern @ 2018-05-23 22:57 UTC (permalink / raw)
  To: netdev; +Cc: roopa, David Ahern
In-Reply-To: <20180523225727.11386-1-dsahern@kernel.org>

From: David Ahern <dsahern@gmail.com>

Add tests verifying prefix routes are inserted with expected metric.

IPv6 prefix route tests
    TEST: Default metric                                      [ OK ]
    TEST: User specified metric on first device               [ OK ]
    TEST: User specified metric on second device              [ OK ]
    TEST: Delete of address on first device                   [ OK ]
    TEST: Modify metric of address                            [ OK ]
    TEST: Prefix route removed on link down                   [ OK ]
    TEST: Prefix route with metric on link up                 [ OK ]

IPv4 prefix route tests
    TEST: Default metric                                      [ OK ]
    TEST: User specified metric on first device               [ OK ]
    TEST: User specified metric on second device              [ OK ]
    TEST: Delete of address on first device                   [ OK ]
    TEST: Modify metric of address                            [ OK ]
    TEST: Prefix route removed on link down                   [ OK ]
    TEST: Prefix route with metric on link up                 [ OK ]

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 tools/testing/selftests/net/fib_tests.sh | 181 ++++++++++++++++++++++++++++++-
 1 file changed, 180 insertions(+), 1 deletion(-)
 mode change 100755 => 100644 tools/testing/selftests/net/fib_tests.sh

diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
old mode 100755
new mode 100644
index e7d76fbc36e9..9780c5a2d73f
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -6,7 +6,8 @@
 
 ret=0
 
-TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt"
+# all tests in this script. Can be overridden with -t option
+TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric"
 VERBOSE=0
 PAUSE_ON_FAIL=no
 PAUSE=no
@@ -642,6 +643,8 @@ check_route6()
 	local rc=0
 
 	out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//')
+	[ "${out}" = "${expected}" ] && return 0
+
 	if [ -z "${out}" ]; then
 		if [ "$VERBOSE" = "1" ]; then
 			printf "\nNo route entry found\n"
@@ -911,6 +914,98 @@ ipv6_route_test()
 	route_cleanup
 }
 
+ip_addr_metric_check()
+{
+	ip addr help 2>&1 | grep -q metric
+	if [ $? -ne 0 ]; then
+		echo "iproute2 command does not support metric for addresses. Skipping test"
+		return 1
+	fi
+
+	return 0
+}
+
+ipv6_addr_metric_test()
+{
+	local rc
+
+	echo
+	echo "IPv6 prefix route tests"
+
+	ip_addr_metric_check || return 1
+
+	setup
+
+	set -e
+	$IP li add dummy1 type dummy
+	$IP li add dummy2 type dummy
+	$IP li set dummy1 up
+	$IP li set dummy2 up
+
+	# default entry is metric 256
+	run_cmd "$IP -6 addr add dev dummy1 2001:db8:104::1/64"
+	run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::2/64"
+	set +e
+
+	check_route6 "2001:db8:104::/64 dev dummy1 proto kernel metric 256 2001:db8:104::/64 dev dummy2 proto kernel metric 256"
+	log_test $? 0 "Default metric"
+
+	set -e
+	run_cmd "$IP -6 addr flush dev dummy1"
+	run_cmd "$IP -6 addr add dev dummy1 2001:db8:104::1/64 metric 257"
+	set +e
+
+	check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 256 2001:db8:104::/64 dev dummy1 proto kernel metric 257"
+	log_test $? 0 "User specified metric on first device"
+
+	set -e
+	run_cmd "$IP -6 addr flush dev dummy2"
+	run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::2/64 metric 258"
+	set +e
+
+	check_route6 "2001:db8:104::/64 dev dummy1 proto kernel metric 257 2001:db8:104::/64 dev dummy2 proto kernel metric 258"
+	log_test $? 0 "User specified metric on second device"
+
+	run_cmd "$IP -6 addr del dev dummy1 2001:db8:104::1/64 metric 257"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 258"
+		rc=$?
+	fi
+	log_test $rc 0 "Delete of address on first device"
+
+	run_cmd "$IP -6 addr change dev dummy2 2001:db8:104::2/64 metric 259"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 259"
+		rc=$?
+	fi
+	log_test $rc 0 "Modify metric of address"
+
+	# verify prefix route removed on down
+	run_cmd "ip netns exec testns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1"
+	run_cmd "$IP li set dev dummy2 down"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6 ""
+		rc=$?
+	fi
+	log_test $rc 0 "Prefix route removed on link down"
+
+	# verify prefix route re-inserted with assigned metric
+	run_cmd "$IP li set dev dummy2 up"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 259"
+		rc=$?
+	fi
+	log_test $rc 0 "Prefix route with metric on link up"
+
+	$IP li del dummy1
+	$IP li del dummy2
+	cleanup
+}
+
 # add route for a prefix, flushing any existing routes first
 # expected to be the first step of a test
 add_route()
@@ -955,6 +1050,8 @@ check_route()
 	local rc=0
 
 	out=$($IP ro ls match ${pfx})
+	[ "${out}" = "${expected}" ] && return 0
+
 	if [ -z "${out}" ]; then
 		if [ "$VERBOSE" = "1" ]; then
 			printf "\nNo route entry found\n"
@@ -1181,6 +1278,86 @@ ipv4_route_test()
 	route_cleanup
 }
 
+ipv4_addr_metric_test()
+{
+	local rc
+
+	echo
+	echo "IPv4 prefix route tests"
+
+	ip_addr_metric_check || return 1
+
+	setup
+
+	set -e
+	$IP li add dummy1 type dummy
+	$IP li add dummy2 type dummy
+	$IP li set dummy1 up
+	$IP li set dummy2 up
+
+	# default entry is metric 256
+	run_cmd "$IP addr add dev dummy1 172.16.104.1/24"
+	run_cmd "$IP addr add dev dummy2 172.16.104.2/24"
+	set +e
+
+	check_route "172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2"
+	log_test $? 0 "Default metric"
+
+	set -e
+	run_cmd "$IP addr flush dev dummy1"
+	run_cmd "$IP addr add dev dummy1 172.16.104.1/24 metric 257"
+	set +e
+
+	check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 metric 257"
+	log_test $? 0 "User specified metric on first device"
+
+	set -e
+	run_cmd "$IP addr flush dev dummy2"
+	run_cmd "$IP addr add dev dummy2 172.16.104.2/24 metric 258"
+	set +e
+
+	check_route "172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 metric 257 172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 258"
+	log_test $? 0 "User specified metric on second device"
+
+	run_cmd "$IP addr del dev dummy1 172.16.104.1/24 metric 257"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 258"
+		rc=$?
+	fi
+	log_test $rc 0 "Delete of address on first device"
+
+	run_cmd "$IP addr change dev dummy2 172.16.104.2/24 metric 259"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 259"
+		rc=$?
+	fi
+	log_test $rc 0 "Modify metric of address"
+
+	# verify prefix route removed on down
+	run_cmd "$IP li set dev dummy2 down"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route ""
+		rc=$?
+	fi
+	log_test $rc 0 "Prefix route removed on link down"
+
+	# verify prefix route re-inserted with assigned metric
+	run_cmd "$IP li set dev dummy2 up"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 259"
+		rc=$?
+	fi
+	log_test $rc 0 "Prefix route with metric on link up"
+
+	$IP li del dummy1
+	$IP li del dummy2
+	cleanup
+}
+
 ################################################################################
 # usage
 
@@ -1245,6 +1422,8 @@ do
 	fib_nexthop_test|nexthop)	fib_nexthop_test;;
 	ipv6_route_test|ipv6_rt)	ipv6_route_test;;
 	ipv4_route_test|ipv4_rt)	ipv4_route_test;;
+	ipv6_addr_metric)		ipv6_addr_metric_test;;
+	ipv4_addr_metric)		ipv4_addr_metric_test;;
 
 	help) echo "Test names: $TESTS"; exit 0;;
 	esac
-- 
2.11.0

^ permalink raw reply related

* [PATCH RFC net-next 8/9] net/ipv6: Add support for specifying metric of connected routes
From: dsahern @ 2018-05-23 22:57 UTC (permalink / raw)
  To: netdev; +Cc: roopa, David Ahern
In-Reply-To: <20180523225727.11386-1-dsahern@kernel.org>

From: David Ahern <dsahern@gmail.com>

Add support for IFA_RT_PRIORITY to ipv6 addresses.

If the metric is changed on an existing address then the new route
is inserted before removing the old one. Since the metric is one
of the route keys, the prefix route can not be atomically replaced.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/addrconf.h |  1 +
 include/net/if_inet6.h |  1 +
 net/ipv6/addrconf.c    | 94 ++++++++++++++++++++++++++++++++++++++++----------
 3 files changed, 78 insertions(+), 18 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 1e3ef04176d6..aec0a32fb531 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -68,6 +68,7 @@ struct ifa6_config {
 	u32 ifa_flags;
 	u32 preferred_lft;
 	u32 valid_lft;
+	u32 rt_priority;
 	u16 scope;
 };
 
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index db389253dc2a..d7578cf49c3a 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -42,6 +42,7 @@ enum {
 struct inet6_ifaddr {
 	struct in6_addr		addr;
 	__u32			prefix_len;
+	__u32			rt_priority;
 
 	/* In seconds, relative to tstamp. Expiry is at tstamp + HZ * lft. */
 	__u32			valid_lft;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1b1e63a4520b..7d726900c8c2 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1056,6 +1056,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
 	INIT_HLIST_NODE(&ifa->addr_lst);
 	ifa->scope = cfg->scope;
 	ifa->prefix_len = cfg->plen;
+	ifa->rt_priority = cfg->rt_priority;
 	ifa->flags = cfg->ifa_flags;
 	/* No need to add the TENTATIVE flag for addresses with NODAD */
 	if (!(cfg->ifa_flags & IFA_F_NODAD))
@@ -1323,6 +1324,8 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp,
 		}
 	}
 
+	memset(&cfg, 0, sizeof(cfg));
+
 	cfg.valid_lft = min_t(__u32, ifp->valid_lft,
 			      idev->cnf.temp_valid_lft + age);
 	cfg.preferred_lft = cnf_temp_preferred_lft + age - idev->desync_factor;
@@ -2314,12 +2317,13 @@ static void  ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpad
  */
 
 static void
-addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
-		      unsigned long expires, u32 flags, gfp_t gfp_flags)
+addrconf_prefix_route(struct in6_addr *pfx, int plen, u32 metric,
+		      struct net_device *dev, unsigned long expires,
+		      u32 flags, gfp_t gfp_flags)
 {
 	struct fib6_config cfg = {
 		.fc_table = l3mdev_fib_table(dev) ? : RT6_TABLE_PREFIX,
-		.fc_metric = IP6_RT_PRIO_ADDRCONF,
+		.fc_metric = metric ? : IP6_RT_PRIO_ADDRCONF,
 		.fc_ifindex = dev->ifindex,
 		.fc_expires = expires,
 		.fc_dst_len = plen,
@@ -2683,7 +2687,8 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 				expires = jiffies_to_clock_t(rt_expires);
 			}
 			addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
-					      dev, expires, flags, GFP_ATOMIC);
+					      0, dev, expires, flags,
+					      GFP_ATOMIC);
 		}
 		fib6_info_release(rt);
 	}
@@ -2891,8 +2896,9 @@ static int inet6_addr_add(struct net *net, int ifindex,
 	ifp = ipv6_add_addr(idev, cfg, true, extack);
 	if (!IS_ERR(ifp)) {
 		if (!(cfg->ifa_flags & IFA_F_NOPREFIXROUTE)) {
-			addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
-					      expires, flags, GFP_KERNEL);
+			addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
+					      ifp->rt_priority, dev, expires,
+					      flags, GFP_KERNEL);
 		}
 
 		/* Send a netlink notification if DAD is enabled and
@@ -3056,7 +3062,7 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
 
 	if (addr.s6_addr32[3]) {
 		add_addr(idev, &addr, plen, scope);
-		addrconf_prefix_route(&addr, plen, idev->dev, 0, pflags,
+		addrconf_prefix_route(&addr, plen, 0, idev->dev, 0, pflags,
 				      GFP_ATOMIC);
 		return;
 	}
@@ -3081,8 +3087,8 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)
 				}
 
 				add_addr(idev, &addr, plen, flag);
-				addrconf_prefix_route(&addr, plen, idev->dev, 0,
-						      pflags, GFP_ATOMIC);
+				addrconf_prefix_route(&addr, plen, 0, idev->dev,
+						      0, pflags, GFP_ATOMIC);
 			}
 		}
 	}
@@ -3128,7 +3134,7 @@ void addrconf_add_linklocal(struct inet6_dev *idev,
 
 	ifp = ipv6_add_addr(idev, &cfg, true, NULL);
 	if (!IS_ERR(ifp)) {
-		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev,
+		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, 0, idev->dev,
 				      0, 0, GFP_ATOMIC);
 		addrconf_dad_start(ifp);
 		in6_ifa_put(ifp);
@@ -3244,7 +3250,7 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
 			addrconf_add_linklocal(idev, &addr,
 					       IFA_F_STABLE_PRIVACY);
 		else if (prefix_route)
-			addrconf_prefix_route(&addr, 64, idev->dev,
+			addrconf_prefix_route(&addr, 64, 0, idev->dev,
 					      0, 0, GFP_KERNEL);
 		break;
 	case IN6_ADDR_GEN_MODE_EUI64:
@@ -3255,7 +3261,7 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
 		if (ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) == 0)
 			addrconf_add_linklocal(idev, &addr, 0);
 		else if (prefix_route)
-			addrconf_prefix_route(&addr, 64, idev->dev,
+			addrconf_prefix_route(&addr, 64, 0, idev->dev,
 					      0, 0, GFP_KERNEL);
 		break;
 	case IN6_ADDR_GEN_MODE_NONE:
@@ -3375,7 +3381,8 @@ static int fixup_permanent_addr(struct net *net,
 
 	if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) {
 		addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
-				      idev->dev, 0, 0, GFP_ATOMIC);
+				      ifp->rt_priority, idev->dev, 0, 0,
+				      GFP_ATOMIC);
 	}
 
 	if (ifp->state == INET6_IFADDR_STATE_PREDAD)
@@ -4495,6 +4502,7 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {
 	[IFA_LOCAL]		= { .len = sizeof(struct in6_addr) },
 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
 	[IFA_FLAGS]		= { .len = sizeof(u32) },
+	[IFA_RT_PRIORITY]	= { .len = sizeof(u32) },
 };
 
 static int
@@ -4527,6 +4535,37 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
 			      ifm->ifa_prefixlen);
 }
 
+static int modify_prefix_route(struct inet6_ifaddr *ifp,
+			       unsigned long expires, u32 flags)
+{
+	struct fib6_info *f6i;
+
+	f6i = addrconf_get_prefix_route(&ifp->addr,
+					ifp->prefix_len,
+					ifp->idev->dev,
+					0, RTF_GATEWAY | RTF_DEFAULT);
+	if (!f6i)
+		return -ENOENT;
+
+	if (f6i->fib6_metric != ifp->rt_priority) {
+		/* add new one */
+		addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
+				      ifp->rt_priority, ifp->idev->dev,
+				      expires, flags, GFP_KERNEL);
+		/* delete old one */
+		ip6_del_rt(dev_net(ifp->idev->dev), f6i);
+	} else {
+		if (!expires)
+			fib6_clean_expires(f6i);
+		else
+			fib6_set_expires(f6i, expires);
+
+		fib6_info_release(f6i);
+	}
+
+	return 0;
+}
+
 static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
 {
 	u32 flags;
@@ -4577,14 +4616,25 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
 	ifp->valid_lft = cfg->valid_lft;
 	ifp->prefered_lft = cfg->preferred_lft;
 
+	if (cfg->rt_priority && cfg->rt_priority != ifp->rt_priority)
+		ifp->rt_priority = cfg->rt_priority;
+
 	spin_unlock_bh(&ifp->lock);
 	if (!(ifp->flags&IFA_F_TENTATIVE))
 		ipv6_ifa_notify(0, ifp);
 
 	if (!(cfg->ifa_flags & IFA_F_NOPREFIXROUTE)) {
-		addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
-				      ifp->idev->dev, expires, flags,
-				      GFP_KERNEL);
+		int rc = -ENOENT;
+
+		if (had_prefixroute)
+			rc = modify_prefix_route(ifp, expires, flags);
+
+		/* prefix route could have been deleted; if so restore it */
+		if (rc == -ENOENT) {
+			addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
+					      ifp->rt_priority, ifp->idev->dev,
+					      expires, flags, GFP_KERNEL);
+		}
 	} else if (had_prefixroute) {
 		enum cleanup_prefix_rt_t action;
 		unsigned long rt_expires;
@@ -4643,6 +4693,9 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	cfg.peer_pfx = peer_pfx;
 	cfg.plen = ifm->ifa_prefixlen;
+	if (tb[IFA_RT_PRIORITY])
+		cfg.rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
+
 	cfg.valid_lft = INFINITY_LIFE_TIME;
 	cfg.preferred_lft = INFINITY_LIFE_TIME;
 
@@ -4745,7 +4798,8 @@ static inline int inet6_ifaddr_msgsize(void)
 	       + nla_total_size(16) /* IFA_LOCAL */
 	       + nla_total_size(16) /* IFA_ADDRESS */
 	       + nla_total_size(sizeof(struct ifa_cacheinfo))
-	       + nla_total_size(4)  /* IFA_FLAGS */;
+	       + nla_total_size(4)  /* IFA_FLAGS */
+	       + nla_total_size(4)  /* IFA_RT_PRIORITY */;
 }
 
 static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
@@ -4791,6 +4845,10 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
 		if (nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->addr) < 0)
 			goto error;
 
+	if (ifa->rt_priority &&
+	    nla_put_u32(skb, IFA_RT_PRIORITY, ifa->rt_priority))
+		goto error;
+
 	if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0)
 		goto error;
 
@@ -5635,7 +5693,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 		if (ifp->idev->cnf.forwarding)
 			addrconf_join_anycast(ifp);
 		if (!ipv6_addr_any(&ifp->peer_addr))
-			addrconf_prefix_route(&ifp->peer_addr, 128,
+			addrconf_prefix_route(&ifp->peer_addr, 128, 0,
 					      ifp->idev->dev, 0, 0,
 					      GFP_ATOMIC);
 		break;
-- 
2.11.0

^ permalink raw reply related

* [PATCH RFC net-next 7/9] net/ipv4: Add support for specifying metric of connected routes
From: dsahern @ 2018-05-23 22:57 UTC (permalink / raw)
  To: netdev; +Cc: roopa, David Ahern
In-Reply-To: <20180523225727.11386-1-dsahern@kernel.org>

From: David Ahern <dsahern@gmail.com>

Add support for IFA_RT_PRIORITY to ipv4 addresses.

If the metric is changed on an existing address then the new route
is inserted before removing the old one. Since the metric is one
of the route keys, the prefix route can not be replaced.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/linux/inetdevice.h |  1 +
 include/net/route.h        |  1 +
 net/ipv4/devinet.c         | 14 ++++++++++++
 net/ipv4/fib_frontend.c    | 56 +++++++++++++++++++++++++++++++++++++---------
 4 files changed, 61 insertions(+), 11 deletions(-)

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index e16fe7d44a71..27650f1bff3d 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -139,6 +139,7 @@ struct in_ifaddr {
 	__be32			ifa_local;
 	__be32			ifa_address;
 	__be32			ifa_mask;
+	__u32			ifa_rt_priority;
 	__be32			ifa_broadcast;
 	unsigned char		ifa_scope;
 	unsigned char		ifa_prefixlen;
diff --git a/include/net/route.h b/include/net/route.h
index dbb032d5921b..bb53cdba38dc 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -225,6 +225,7 @@ struct rtable *rt_dst_alloc(struct net_device *dev,
 struct in_ifaddr;
 void fib_add_ifaddr(struct in_ifaddr *);
 void fib_del_ifaddr(struct in_ifaddr *, struct in_ifaddr *);
+void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric);
 
 void rt_add_uncached_list(struct rtable *rt);
 void rt_del_uncached_list(struct rtable *rt);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 40f001782c1b..c5f08c11ded0 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -99,6 +99,7 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
 	[IFA_LABEL]     	= { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
 	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) },
 	[IFA_FLAGS]		= { .type = NLA_U32 },
+	[IFA_RT_PRIORITY]	= { .type = NLA_U32 },
 };
 
 #define IN4_ADDR_HSIZE_SHIFT	8
@@ -835,6 +836,9 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
 	else
 		memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
 
+	if (tb[IFA_RT_PRIORITY])
+		ifa->ifa_rt_priority = nla_get_u32(tb[IFA_RT_PRIORITY]);
+
 	if (tb[IFA_CACHEINFO]) {
 		struct ifa_cacheinfo *ci;
 
@@ -906,12 +910,19 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid,
 					 extack);
 	} else {
+		u32 new_metric = ifa->ifa_rt_priority;
+
 		inet_free_ifa(ifa);
 
 		if (nlh->nlmsg_flags & NLM_F_EXCL ||
 		    !(nlh->nlmsg_flags & NLM_F_REPLACE))
 			return -EEXIST;
 		ifa = ifa_existing;
+
+		if (ifa->ifa_rt_priority != new_metric)
+			fib_modify_prefix_metric(ifa, new_metric);
+		ifa->ifa_rt_priority = new_metric;
+
 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
 		cancel_delayed_work(&check_lifetime_work);
 		queue_delayed_work(system_power_efficient_wq,
@@ -1549,6 +1560,7 @@ static size_t inet_nlmsg_size(void)
 	       + nla_total_size(4) /* IFA_BROADCAST */
 	       + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
 	       + nla_total_size(4)  /* IFA_FLAGS */
+	       + nla_total_size(4)  /* IFA_RT_PRIORITY */
 	       + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
 }
 
@@ -1618,6 +1630,8 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
 	    (ifa->ifa_label[0] &&
 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
 	    nla_put_u32(skb, IFA_FLAGS, ifa->ifa_flags) ||
+	    (ifa->ifa_rt_priority &&
+	     nla_put_u32(skb, IFA_RT_PRIORITY, ifa->ifa_rt_priority)) ||
 	    put_cacheinfo(skb, ifa->ifa_cstamp, ifa->ifa_tstamp,
 			  preferred, valid))
 		goto nla_put_failure;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 897ae92dff0f..76adbbe24173 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -848,7 +848,8 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
  * to fib engine. It is legal, because all events occur
  * only when netlink is already locked.
  */
-static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa)
+static void fib_magic(int cmd, int type, __be32 dst, int dst_len,
+		      struct in_ifaddr *ifa, u32 rt_priority)
 {
 	struct net *net = dev_net(ifa->ifa_dev->dev);
 	u32 tb_id = l3mdev_fib_table(ifa->ifa_dev->dev);
@@ -858,6 +859,7 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad
 		.fc_type = type,
 		.fc_dst = dst,
 		.fc_dst_len = dst_len,
+		.fc_priority = rt_priority,
 		.fc_prefsrc = ifa->ifa_local,
 		.fc_oif = ifa->ifa_dev->dev->ifindex,
 		.fc_nlflags = NLM_F_CREATE | NLM_F_APPEND,
@@ -903,31 +905,60 @@ void fib_add_ifaddr(struct in_ifaddr *ifa)
 		}
 	}
 
-	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim);
+	fib_magic(RTM_NEWROUTE, RTN_LOCAL, addr, 32, prim, 0);
 
 	if (!(dev->flags & IFF_UP))
 		return;
 
 	/* Add broadcast address, if it is explicitly assigned. */
 	if (ifa->ifa_broadcast && ifa->ifa_broadcast != htonl(0xFFFFFFFF))
-		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
+		fib_magic(RTM_NEWROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
+			  prim, 0);
 
 	if (!ipv4_is_zeronet(prefix) && !(ifa->ifa_flags & IFA_F_SECONDARY) &&
 	    (prefix != addr || ifa->ifa_prefixlen < 32)) {
 		if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
 			fib_magic(RTM_NEWROUTE,
 				  dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
-				  prefix, ifa->ifa_prefixlen, prim);
+				  prefix, ifa->ifa_prefixlen, prim,
+				  ifa->ifa_rt_priority);
 
 		/* Add network specific broadcasts, when it takes a sense */
 		if (ifa->ifa_prefixlen < 31) {
-			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32, prim);
+			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix, 32,
+				  prim, 0);
 			fib_magic(RTM_NEWROUTE, RTN_BROADCAST, prefix | ~mask,
-				  32, prim);
+				  32, prim, 0);
 		}
 	}
 }
 
+void fib_modify_prefix_metric(struct in_ifaddr *ifa, u32 new_metric)
+{
+	struct in_device *in_dev = ifa->ifa_dev;
+	struct net_device *dev = in_dev->dev;
+	struct in_ifaddr *prim = ifa;
+	__be32 mask = ifa->ifa_mask;
+	__be32 addr = ifa->ifa_local;
+	__be32 prefix = ifa->ifa_address & mask;
+
+	if (!(dev->flags & IFF_UP) ||
+	    ifa->ifa_flags & (IFA_F_SECONDARY | IFA_F_NOPREFIXROUTE) ||
+	    ipv4_is_zeronet(prefix) ||
+	    prefix == addr || ifa->ifa_prefixlen == 32)
+		return;
+
+	/* add the new */
+	fib_magic(RTM_NEWROUTE,
+		  dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+		  prefix, ifa->ifa_prefixlen, prim, new_metric);
+
+	/* delete the old */
+	fib_magic(RTM_DELROUTE,
+		  dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
+		  prefix, ifa->ifa_prefixlen, prim, ifa->ifa_rt_priority);
+}
+
 /* Delete primary or secondary address.
  * Optionally, on secondary address promotion consider the addresses
  * from subnet iprim as deleted, even if they are in device list.
@@ -969,7 +1000,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
 		if (!(ifa->ifa_flags & IFA_F_NOPREFIXROUTE))
 			fib_magic(RTM_DELROUTE,
 				  dev->flags & IFF_LOOPBACK ? RTN_LOCAL : RTN_UNICAST,
-				  any, ifa->ifa_prefixlen, prim);
+				  any, ifa->ifa_prefixlen, prim, 0);
 		subnet = 1;
 	}
 
@@ -1053,17 +1084,20 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
 
 no_promotions:
 	if (!(ok & BRD_OK))
-		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
+		fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32,
+			  prim, 0);
 	if (subnet && ifa->ifa_prefixlen < 31) {
 		if (!(ok & BRD1_OK))
-			fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32, prim);
+			fib_magic(RTM_DELROUTE, RTN_BROADCAST, brd, 32,
+				  prim, 0);
 		if (!(ok & BRD0_OK))
-			fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32, prim);
+			fib_magic(RTM_DELROUTE, RTN_BROADCAST, any, 32,
+				  prim, 0);
 	}
 	if (!(ok & LOCAL_OK)) {
 		unsigned int addr_type;
 
-		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim);
+		fib_magic(RTM_DELROUTE, RTN_LOCAL, ifa->ifa_local, 32, prim, 0);
 
 		/* Check, that this local address finally disappeared. */
 		addr_type = inet_addr_type_dev_table(dev_net(dev), dev,
-- 
2.11.0

^ permalink raw reply related

* [PATCH RFC net-next 6/9] net: Add IFA_RT_PRIORITY address attribute
From: dsahern @ 2018-05-23 22:57 UTC (permalink / raw)
  To: netdev; +Cc: roopa, David Ahern
In-Reply-To: <20180523225727.11386-1-dsahern@kernel.org>

From: David Ahern <dsahern@gmail.com>

Currently, if two interfaces have addresses in the same connected route,
then the order of the prefix route entries is determined by the order in
which the addresses are assigned or the links brought up.

Add IFA_RT_PRIORITY to allow user to specify the metric of the prefix
route associated with an address giving interface managers better
control of the order of prefix routes.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/uapi/linux/if_addr.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/uapi/linux/if_addr.h b/include/uapi/linux/if_addr.h
index 2ef053d265de..ebaf5701c9db 100644
--- a/include/uapi/linux/if_addr.h
+++ b/include/uapi/linux/if_addr.h
@@ -33,6 +33,7 @@ enum {
 	IFA_CACHEINFO,
 	IFA_MULTICAST,
 	IFA_FLAGS,
+	IFA_RT_PRIORITY,  /* u32, priority/metric for prefix route */
 	__IFA_MAX,
 };

-- 
2.11.0

^ permalink raw reply related

* [PATCH RFC net-next 5/9] net/ipv6: Pass ifa6_config struct to inet6_addr_modify
From: dsahern @ 2018-05-23 22:57 UTC (permalink / raw)
  To: netdev; +Cc: roopa, David Ahern
In-Reply-To: <20180523225727.11386-1-dsahern@kernel.org>

From: David Ahern <dsahern@gmail.com>

Update inet6_addr_modify to take ifa6_config.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 net/ipv6/addrconf.c | 44 +++++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 21 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 2db1acf70610..1b1e63a4520b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4527,8 +4527,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh,
 			      ifm->ifa_prefixlen);
 }
 
-static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
-			     u32 prefered_lft, u32 valid_lft)
+static int inet6_addr_modify(struct inet6_ifaddr *ifp, struct ifa6_config *cfg)
 {
 	u32 flags;
 	clock_t expires;
@@ -4538,32 +4537,32 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
 
 	ASSERT_RTNL();
 
-	if (!valid_lft || (prefered_lft > valid_lft))
+	if (!cfg->valid_lft || cfg->preferred_lft > cfg->valid_lft)
 		return -EINVAL;
 
-	if (ifa_flags & IFA_F_MANAGETEMPADDR &&
+	if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR &&
 	    (ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64))
 		return -EINVAL;
 
 	if (!(ifp->flags & IFA_F_TENTATIVE) || ifp->flags & IFA_F_DADFAILED)
-		ifa_flags &= ~IFA_F_OPTIMISTIC;
+		cfg->ifa_flags &= ~IFA_F_OPTIMISTIC;
 
-	timeout = addrconf_timeout_fixup(valid_lft, HZ);
+	timeout = addrconf_timeout_fixup(cfg->valid_lft, HZ);
 	if (addrconf_finite_timeout(timeout)) {
 		expires = jiffies_to_clock_t(timeout * HZ);
-		valid_lft = timeout;
+		cfg->valid_lft = timeout;
 		flags = RTF_EXPIRES;
 	} else {
 		expires = 0;
 		flags = 0;
-		ifa_flags |= IFA_F_PERMANENT;
+		cfg->ifa_flags |= IFA_F_PERMANENT;
 	}
 
-	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
+	timeout = addrconf_timeout_fixup(cfg->preferred_lft, HZ);
 	if (addrconf_finite_timeout(timeout)) {
 		if (timeout == 0)
-			ifa_flags |= IFA_F_DEPRECATED;
-		prefered_lft = timeout;
+			cfg->ifa_flags |= IFA_F_DEPRECATED;
+		cfg->preferred_lft = timeout;
 	}
 
 	spin_lock_bh(&ifp->lock);
@@ -4573,16 +4572,16 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
 	ifp->flags &= ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD |
 			IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
 			IFA_F_NOPREFIXROUTE);
-	ifp->flags |= ifa_flags;
+	ifp->flags |= cfg->ifa_flags;
 	ifp->tstamp = jiffies;
-	ifp->valid_lft = valid_lft;
-	ifp->prefered_lft = prefered_lft;
+	ifp->valid_lft = cfg->valid_lft;
+	ifp->prefered_lft = cfg->preferred_lft;
 
 	spin_unlock_bh(&ifp->lock);
 	if (!(ifp->flags&IFA_F_TENTATIVE))
 		ipv6_ifa_notify(0, ifp);
 
-	if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
+	if (!(cfg->ifa_flags & IFA_F_NOPREFIXROUTE)) {
 		addrconf_prefix_route(&ifp->addr, ifp->prefix_len,
 				      ifp->idev->dev, expires, flags,
 				      GFP_KERNEL);
@@ -4601,10 +4600,14 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
 	}
 
 	if (was_managetempaddr || ifp->flags & IFA_F_MANAGETEMPADDR) {
-		if (was_managetempaddr && !(ifp->flags & IFA_F_MANAGETEMPADDR))
-			valid_lft = prefered_lft = 0;
-		manage_tempaddrs(ifp->idev, ifp, valid_lft, prefered_lft,
-				 !was_managetempaddr, jiffies);
+		if (was_managetempaddr &&
+		    !(ifp->flags & IFA_F_MANAGETEMPADDR)) {
+			cfg->valid_lft = 0;
+			cfg->preferred_lft = 0;
+		}
+		manage_tempaddrs(ifp->idev, ifp, cfg->valid_lft,
+				 cfg->preferred_lft, !was_managetempaddr,
+				 jiffies);
 	}
 
 	addrconf_verify_rtnl();
@@ -4691,8 +4694,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
 	    !(nlh->nlmsg_flags & NLM_F_REPLACE))
 		err = -EEXIST;
 	else
-		err = inet6_addr_modify(ifa, cfg.ifa_flags, cfg.preferred_lft,
-					cfg.valid_lft);
+		err = inet6_addr_modify(ifa, &cfg);
 
 	in6_ifa_put(ifa);
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH RFC net-next 2/9] net/ipv6: Convert ipv6_add_addr to struct ifa6_config
From: dsahern @ 2018-05-23 22:57 UTC (permalink / raw)
  To: netdev; +Cc: roopa, David Ahern
In-Reply-To: <20180523225727.11386-1-dsahern@kernel.org>

From: David Ahern <dsahern@gmail.com>

Convert ipv6_add_addr from a list of address parameters to use the
new ifa6_config struct. Mapping of variable changes:

    addr      -->  cfg->pfx
    peer_addr -->  cfg->peer_pfx
    pfxlen    -->  cfg->plen
    flags     -->  cfg->ifa_flags

scope, valid_lft, prefered_lft have the same names within cfg
(with corrected spelling).

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 net/ipv6/addrconf.c | 118 +++++++++++++++++++++++++++++++---------------------
 1 file changed, 71 insertions(+), 47 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index fbfd71a2d9c8..eff925b2064e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -986,17 +986,15 @@ static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa)
 /* On success it returns ifp with increased reference count */
 
 static struct inet6_ifaddr *
-ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
-	      const struct in6_addr *peer_addr, int pfxlen,
-	      int scope, u32 flags, u32 valid_lft, u32 prefered_lft,
+ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
 	      bool can_block, struct netlink_ext_ack *extack)
 {
 	gfp_t gfp_flags = can_block ? GFP_KERNEL : GFP_ATOMIC;
+	int addr_type = ipv6_addr_type(cfg->pfx);
 	struct net *net = dev_net(idev->dev);
 	struct inet6_ifaddr *ifa = NULL;
 	struct fib6_info *f6i = NULL;
 	int err = 0;
-	int addr_type = ipv6_addr_type(addr);
 
 	if (addr_type == IPV6_ADDR_ANY ||
 	    addr_type & IPV6_ADDR_MULTICAST ||
@@ -1019,7 +1017,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 	 */
 	if (can_block) {
 		struct in6_validator_info i6vi = {
-			.i6vi_addr = *addr,
+			.i6vi_addr = *cfg->pfx,
 			.i6vi_dev = idev,
 			.extack = extack,
 		};
@@ -1036,7 +1034,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 		goto out;
 	}
 
-	f6i = addrconf_f6i_alloc(net, idev, addr, false, gfp_flags);
+	f6i = addrconf_f6i_alloc(net, idev, cfg->pfx, false, gfp_flags);
 	if (IS_ERR(f6i)) {
 		err = PTR_ERR(f6i);
 		f6i = NULL;
@@ -1049,21 +1047,21 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 
 	neigh_parms_data_state_setall(idev->nd_parms);
 
-	ifa->addr = *addr;
-	if (peer_addr)
-		ifa->peer_addr = *peer_addr;
+	ifa->addr = *cfg->pfx;
+	if (cfg->peer_pfx)
+		ifa->peer_addr = *cfg->peer_pfx;
 
 	spin_lock_init(&ifa->lock);
 	INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work);
 	INIT_HLIST_NODE(&ifa->addr_lst);
-	ifa->scope = scope;
-	ifa->prefix_len = pfxlen;
-	ifa->flags = flags;
+	ifa->scope = cfg->scope;
+	ifa->prefix_len = cfg->plen;
+	ifa->flags = cfg->ifa_flags;
 	/* No need to add the TENTATIVE flag for addresses with NODAD */
-	if (!(flags & IFA_F_NODAD))
+	if (!(cfg->ifa_flags & IFA_F_NODAD))
 		ifa->flags |= IFA_F_TENTATIVE;
-	ifa->valid_lft = valid_lft;
-	ifa->prefered_lft = prefered_lft;
+	ifa->valid_lft = cfg->valid_lft;
+	ifa->prefered_lft = cfg->preferred_lft;
 	ifa->cstamp = ifa->tstamp = jiffies;
 	ifa->tokenized = false;
 
@@ -1262,6 +1260,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp,
 	struct in6_addr addr, *tmpaddr;
 	unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_tstamp, age;
 	unsigned long regen_advance;
+	struct ifa6_config cfg;
 	int tmp_plen;
 	int ret = 0;
 	u32 addr_flags;
@@ -1358,9 +1357,15 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp,
 	if (ifp->flags & IFA_F_OPTIMISTIC)
 		addr_flags |= IFA_F_OPTIMISTIC;
 
-	ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen,
-			    ipv6_addr_scope(&addr), addr_flags,
-			    tmp_valid_lft, tmp_prefered_lft, block, NULL);
+	cfg.pfx = &addr;
+	cfg.plen = tmp_plen;
+	cfg.peer_pfx = NULL;
+	cfg.ifa_flags = addr_flags;
+	cfg.preferred_lft = tmp_prefered_lft;
+	cfg.valid_lft = tmp_valid_lft;
+	cfg.scope = ipv6_addr_scope(cfg.pfx);
+
+	ift = ipv6_add_addr(idev, &cfg, block, NULL);
 	if (IS_ERR(ift)) {
 		in6_ifa_put(ifp);
 		in6_dev_put(idev);
@@ -2031,13 +2036,17 @@ void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
 	spin_lock_bh(&ifp->lock);
 
 	if (ifp->flags & IFA_F_STABLE_PRIVACY) {
-		int scope = ifp->scope;
-		u32 flags = ifp->flags;
 		struct in6_addr new_addr;
 		struct inet6_ifaddr *ifp2;
-		u32 valid_lft, preferred_lft;
-		int pfxlen = ifp->prefix_len;
 		int retries = ifp->stable_privacy_retry + 1;
+		struct ifa6_config cfg = {
+			.pfx = &new_addr,
+			.plen = ifp->prefix_len,
+			.ifa_flags = ifp->flags,
+			.valid_lft = ifp->valid_lft,
+			.preferred_lft = ifp->prefered_lft,
+			.scope = ifp->scope,
+		};
 
 		if (retries > net->ipv6.sysctl.idgen_retries) {
 			net_info_ratelimited("%s: privacy stable address generation failed because of DAD conflicts!\n",
@@ -2050,9 +2059,6 @@ void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
 						 idev))
 			goto errdad;
 
-		valid_lft = ifp->valid_lft;
-		preferred_lft = ifp->prefered_lft;
-
 		spin_unlock_bh(&ifp->lock);
 
 		if (idev->cnf.max_addresses &&
@@ -2063,9 +2069,7 @@ void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp)
 		net_info_ratelimited("%s: generating new stable privacy address because of DAD conflict\n",
 				     ifp->idev->dev->name);
 
-		ifp2 = ipv6_add_addr(idev, &new_addr, NULL, pfxlen,
-				     scope, flags, valid_lft,
-				     preferred_lft, false, NULL);
+		ifp2 = ipv6_add_addr(idev, &cfg, false, NULL);
 		if (IS_ERR(ifp2))
 			goto lock_errdad;
 
@@ -2507,12 +2511,20 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
 
 	if (!ifp && valid_lft) {
 		int max_addresses = in6_dev->cnf.max_addresses;
+		struct ifa6_config cfg = {
+			.pfx = addr,
+			.plen = pinfo->prefix_len,
+			.ifa_flags = addr_flags,
+			.valid_lft = valid_lft,
+			.preferred_lft = prefered_lft,
+			.scope = addr_type & IPV6_ADDR_SCOPE_MASK,
+		};
 
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 		if ((net->ipv6.devconf_all->optimistic_dad ||
 		     in6_dev->cnf.optimistic_dad) &&
 		    !net->ipv6.devconf_all->forwarding && sllao)
-			addr_flags |= IFA_F_OPTIMISTIC;
+			cfg.ifa_flags |= IFA_F_OPTIMISTIC;
 #endif
 
 		/* Do not allow to create too much of autoconfigured
@@ -2520,11 +2532,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
 		 */
 		if (!max_addresses ||
 		    ipv6_count_addresses(in6_dev) < max_addresses)
-			ifp = ipv6_add_addr(in6_dev, addr, NULL,
-					    pinfo->prefix_len,
-					    addr_type&IPV6_ADDR_SCOPE_MASK,
-					    addr_flags, valid_lft,
-					    prefered_lft, false, NULL);
+			ifp = ipv6_add_addr(in6_dev, &cfg, false, NULL);
 
 		if (IS_ERR_OR_NULL(ifp))
 			return -1;
@@ -2836,12 +2844,19 @@ static int inet6_addr_add(struct net *net, int ifindex,
 			  __u32 prefered_lft, __u32 valid_lft,
 			  struct netlink_ext_ack *extack)
 {
+	struct ifa6_config cfg = {
+		.pfx = pfx,
+		.plen = plen,
+		.peer_pfx = peer_pfx,
+		.ifa_flags = ifa_flags,
+		.preferred_lft = prefered_lft,
+		.valid_lft = valid_lft,
+	};
 	struct inet6_ifaddr *ifp;
 	struct inet6_dev *idev;
 	struct net_device *dev;
 	unsigned long timeout;
 	clock_t expires;
-	int scope;
 	u32 flags;
 
 	ASSERT_RTNL();
@@ -2872,7 +2887,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
 			return ret;
 	}
 
-	scope = ipv6_addr_scope(pfx);
+	cfg.scope = ipv6_addr_scope(pfx);
 
 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
 	if (addrconf_finite_timeout(timeout)) {
@@ -2892,9 +2907,7 @@ static int inet6_addr_add(struct net *net, int ifindex,
 		prefered_lft = timeout;
 	}
 
-	ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags,
-			    valid_lft, prefered_lft, true, extack);
-
+	ifp = ipv6_add_addr(idev, &cfg, true, extack);
 	if (!IS_ERR(ifp)) {
 		if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
 			addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
@@ -3010,11 +3023,16 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
 		     int plen, int scope)
 {
 	struct inet6_ifaddr *ifp;
+	struct ifa6_config cfg = {
+		.pfx = addr,
+		.plen = plen,
+		.ifa_flags = IFA_F_PERMANENT,
+		.valid_lft = INFINITY_LIFE_TIME,
+		.preferred_lft = INFINITY_LIFE_TIME,
+		.scope = scope
+	};
 
-	ifp = ipv6_add_addr(idev, addr, NULL, plen,
-			    scope, IFA_F_PERMANENT,
-			    INFINITY_LIFE_TIME, INFINITY_LIFE_TIME,
-			    true, NULL);
+	ifp = ipv6_add_addr(idev, &cfg, true, NULL);
 	if (!IS_ERR(ifp)) {
 		spin_lock_bh(&ifp->lock);
 		ifp->flags &= ~IFA_F_TENTATIVE;
@@ -3104,18 +3122,24 @@ static void init_loopback(struct net_device *dev)
 void addrconf_add_linklocal(struct inet6_dev *idev,
 			    const struct in6_addr *addr, u32 flags)
 {
+	struct ifa6_config cfg = {
+		.pfx = addr,
+		.plen = 64,
+		.ifa_flags = flags | IFA_F_PERMANENT,
+		.valid_lft = INFINITY_LIFE_TIME,
+		.preferred_lft = INFINITY_LIFE_TIME,
+		.scope = IFA_LINK
+	};
 	struct inet6_ifaddr *ifp;
-	u32 addr_flags = flags | IFA_F_PERMANENT;
 
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 	if ((dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad ||
 	     idev->cnf.optimistic_dad) &&
 	    !dev_net(idev->dev)->ipv6.devconf_all->forwarding)
-		addr_flags |= IFA_F_OPTIMISTIC;
+		cfg.ifa_flags |= IFA_F_OPTIMISTIC;
 #endif
 
-	ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags,
-			    INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, true, NULL);
+	ifp = ipv6_add_addr(idev, &cfg, true, NULL);
 	if (!IS_ERR(ifp)) {
 		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev,
 				      0, 0, GFP_ATOMIC);
-- 
2.11.0

^ permalink raw reply related

* [PATCH RFC net-next 3/9] net/ipv6: Remove temp variables in ipv6_create_tempaddr
From: dsahern @ 2018-05-23 22:57 UTC (permalink / raw)
  To: netdev; +Cc: roopa, David Ahern
In-Reply-To: <20180523225727.11386-1-dsahern@kernel.org>

From: David Ahern <dsahern@gmail.com>

Remove temp variables in favor of ifa6_config struct.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 net/ipv6/addrconf.c | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index eff925b2064e..4988f2265882 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1258,12 +1258,10 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp,
 {
 	struct inet6_dev *idev = ifp->idev;
 	struct in6_addr addr, *tmpaddr;
-	unsigned long tmp_prefered_lft, tmp_valid_lft, tmp_tstamp, age;
+	unsigned long tmp_tstamp, age;
 	unsigned long regen_advance;
 	struct ifa6_config cfg;
-	int tmp_plen;
 	int ret = 0;
-	u32 addr_flags;
 	unsigned long now = jiffies;
 	long max_desync_factor;
 	s32 cnf_temp_preferred_lft;
@@ -1325,13 +1323,12 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp,
 		}
 	}
 
-	tmp_valid_lft = min_t(__u32,
-			      ifp->valid_lft,
+	cfg.valid_lft = min_t(__u32, ifp->valid_lft,
 			      idev->cnf.temp_valid_lft + age);
-	tmp_prefered_lft = cnf_temp_preferred_lft + age -
-			    idev->desync_factor;
-	tmp_prefered_lft = min_t(__u32, ifp->prefered_lft, tmp_prefered_lft);
-	tmp_plen = ifp->prefix_len;
+	cfg.preferred_lft = cnf_temp_preferred_lft + age - idev->desync_factor;
+	cfg.preferred_lft = min_t(__u32, ifp->prefered_lft, cfg.preferred_lft);
+
+	cfg.plen = ifp->prefix_len;
 	tmp_tstamp = ifp->tstamp;
 	spin_unlock_bh(&ifp->lock);
 
@@ -1345,24 +1342,19 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp,
 	 * temporary addresses being generated.
 	 */
 	age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
-	if (tmp_prefered_lft <= regen_advance + age) {
+	if (cfg.preferred_lft <= regen_advance + age) {
 		in6_ifa_put(ifp);
 		in6_dev_put(idev);
 		ret = -1;
 		goto out;
 	}
 
-	addr_flags = IFA_F_TEMPORARY;
+	cfg.ifa_flags = IFA_F_TEMPORARY;
 	/* set in addrconf_prefix_rcv() */
 	if (ifp->flags & IFA_F_OPTIMISTIC)
-		addr_flags |= IFA_F_OPTIMISTIC;
+		cfg.ifa_flags |= IFA_F_OPTIMISTIC;
 
 	cfg.pfx = &addr;
-	cfg.plen = tmp_plen;
-	cfg.peer_pfx = NULL;
-	cfg.ifa_flags = addr_flags;
-	cfg.preferred_lft = tmp_prefered_lft;
-	cfg.valid_lft = tmp_valid_lft;
 	cfg.scope = ipv6_addr_scope(cfg.pfx);
 
 	ift = ipv6_add_addr(idev, &cfg, block, NULL);
-- 
2.11.0

^ permalink raw reply related

* [PATCH RFC net-next 4/9] net/ipv6: Pass ifa6_config struct to inet6_addr_add
From: dsahern @ 2018-05-23 22:57 UTC (permalink / raw)
  To: netdev; +Cc: roopa, David Ahern
In-Reply-To: <20180523225727.11386-1-dsahern@kernel.org>

From: David Ahern <dsahern@gmail.com>

Move the creation of struct ifa6_config up to callers of inet6_addr_add.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 net/ipv6/addrconf.c | 112 ++++++++++++++++++++++++++--------------------------
 1 file changed, 57 insertions(+), 55 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4988f2265882..2db1acf70610 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2830,20 +2830,9 @@ static int ipv6_mc_config(struct sock *sk, bool join,
  *	Manual configuration of address on an interface
  */
 static int inet6_addr_add(struct net *net, int ifindex,
-			  const struct in6_addr *pfx,
-			  const struct in6_addr *peer_pfx,
-			  unsigned int plen, __u32 ifa_flags,
-			  __u32 prefered_lft, __u32 valid_lft,
+			  struct ifa6_config *cfg,
 			  struct netlink_ext_ack *extack)
 {
-	struct ifa6_config cfg = {
-		.pfx = pfx,
-		.plen = plen,
-		.peer_pfx = peer_pfx,
-		.ifa_flags = ifa_flags,
-		.preferred_lft = prefered_lft,
-		.valid_lft = valid_lft,
-	};
 	struct inet6_ifaddr *ifp;
 	struct inet6_dev *idev;
 	struct net_device *dev;
@@ -2853,14 +2842,14 @@ static int inet6_addr_add(struct net *net, int ifindex,
 
 	ASSERT_RTNL();
 
-	if (plen > 128)
+	if (cfg->plen > 128)
 		return -EINVAL;
 
 	/* check the lifetime */
-	if (!valid_lft || prefered_lft > valid_lft)
+	if (!cfg->valid_lft || cfg->preferred_lft > cfg->valid_lft)
 		return -EINVAL;
 
-	if (ifa_flags & IFA_F_MANAGETEMPADDR && plen != 64)
+	if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR && cfg->plen != 64)
 		return -EINVAL;
 
 	dev = __dev_get_by_index(net, ifindex);
@@ -2871,37 +2860,37 @@ static int inet6_addr_add(struct net *net, int ifindex,
 	if (IS_ERR(idev))
 		return PTR_ERR(idev);
 
-	if (ifa_flags & IFA_F_MCAUTOJOIN) {
+	if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) {
 		int ret = ipv6_mc_config(net->ipv6.mc_autojoin_sk,
-					 true, pfx, ifindex);
+					 true, cfg->pfx, ifindex);
 
 		if (ret < 0)
 			return ret;
 	}
 
-	cfg.scope = ipv6_addr_scope(pfx);
+	cfg->scope = ipv6_addr_scope(cfg->pfx);
 
-	timeout = addrconf_timeout_fixup(valid_lft, HZ);
+	timeout = addrconf_timeout_fixup(cfg->valid_lft, HZ);
 	if (addrconf_finite_timeout(timeout)) {
 		expires = jiffies_to_clock_t(timeout * HZ);
-		valid_lft = timeout;
+		cfg->valid_lft = timeout;
 		flags = RTF_EXPIRES;
 	} else {
 		expires = 0;
 		flags = 0;
-		ifa_flags |= IFA_F_PERMANENT;
+		cfg->ifa_flags |= IFA_F_PERMANENT;
 	}
 
-	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
+	timeout = addrconf_timeout_fixup(cfg->preferred_lft, HZ);
 	if (addrconf_finite_timeout(timeout)) {
 		if (timeout == 0)
-			ifa_flags |= IFA_F_DEPRECATED;
-		prefered_lft = timeout;
+			cfg->ifa_flags |= IFA_F_DEPRECATED;
+		cfg->preferred_lft = timeout;
 	}
 
-	ifp = ipv6_add_addr(idev, &cfg, true, extack);
+	ifp = ipv6_add_addr(idev, cfg, true, extack);
 	if (!IS_ERR(ifp)) {
-		if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) {
+		if (!(cfg->ifa_flags & IFA_F_NOPREFIXROUTE)) {
 			addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev,
 					      expires, flags, GFP_KERNEL);
 		}
@@ -2917,15 +2906,15 @@ static int inet6_addr_add(struct net *net, int ifindex,
 		 * manually configured addresses
 		 */
 		addrconf_dad_start(ifp);
-		if (ifa_flags & IFA_F_MANAGETEMPADDR)
-			manage_tempaddrs(idev, ifp, valid_lft, prefered_lft,
-					 true, jiffies);
+		if (cfg->ifa_flags & IFA_F_MANAGETEMPADDR)
+			manage_tempaddrs(idev, ifp, cfg->valid_lft,
+					 cfg->preferred_lft, true, jiffies);
 		in6_ifa_put(ifp);
 		addrconf_verify_rtnl();
 		return 0;
-	} else if (ifa_flags & IFA_F_MCAUTOJOIN) {
-		ipv6_mc_config(net->ipv6.mc_autojoin_sk,
-			       false, pfx, ifindex);
+	} else if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) {
+		ipv6_mc_config(net->ipv6.mc_autojoin_sk, false,
+			       cfg->pfx, ifindex);
 	}
 
 	return PTR_ERR(ifp);
@@ -2976,6 +2965,11 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
 
 int addrconf_add_ifaddr(struct net *net, void __user *arg)
 {
+	struct ifa6_config cfg = {
+		.ifa_flags = IFA_F_PERMANENT,
+		.preferred_lft = INFINITY_LIFE_TIME,
+		.valid_lft = INFINITY_LIFE_TIME,
+	};
 	struct in6_ifreq ireq;
 	int err;
 
@@ -2985,10 +2979,11 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
 	if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
 		return -EFAULT;
 
+	cfg.pfx = &ireq.ifr6_addr;
+	cfg.plen = ireq.ifr6_prefixlen;
+
 	rtnl_lock();
-	err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, NULL,
-			     ireq.ifr6_prefixlen, IFA_F_PERMANENT,
-			     INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, NULL);
+	err = inet6_addr_add(net, ireq.ifr6_ifindex, &cfg, NULL);
 	rtnl_unlock();
 	return err;
 }
@@ -4624,12 +4619,11 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct net *net = sock_net(skb->sk);
 	struct ifaddrmsg *ifm;
 	struct nlattr *tb[IFA_MAX+1];
-	struct in6_addr *pfx, *peer_pfx;
+	struct in6_addr *peer_pfx;
 	struct inet6_ifaddr *ifa;
 	struct net_device *dev;
 	struct inet6_dev *idev;
-	u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
-	u32 ifa_flags;
+	struct ifa6_config cfg;
 	int err;
 
 	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy,
@@ -4637,60 +4631,68 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (err < 0)
 		return err;
 
+	memset(&cfg, 0, sizeof(cfg));
+
 	ifm = nlmsg_data(nlh);
-	pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx);
-	if (!pfx)
+	cfg.pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx);
+	if (!cfg.pfx)
 		return -EINVAL;
 
+	cfg.peer_pfx = peer_pfx;
+	cfg.plen = ifm->ifa_prefixlen;
+	cfg.valid_lft = INFINITY_LIFE_TIME;
+	cfg.preferred_lft = INFINITY_LIFE_TIME;
+
 	if (tb[IFA_CACHEINFO]) {
 		struct ifa_cacheinfo *ci;
 
 		ci = nla_data(tb[IFA_CACHEINFO]);
-		valid_lft = ci->ifa_valid;
-		preferred_lft = ci->ifa_prefered;
-	} else {
-		preferred_lft = INFINITY_LIFE_TIME;
-		valid_lft = INFINITY_LIFE_TIME;
+		cfg.valid_lft = ci->ifa_valid;
+		cfg.preferred_lft = ci->ifa_prefered;
 	}
 
 	dev =  __dev_get_by_index(net, ifm->ifa_index);
 	if (!dev)
 		return -ENODEV;
 
-	ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags;
+	if (tb[IFA_FLAGS])
+		cfg.ifa_flags = nla_get_u32(tb[IFA_FLAGS]);
+	else
+		cfg.ifa_flags = ifm->ifa_flags;
 
 	/* We ignore other flags so far. */
-	ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
-		     IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
+	cfg.ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS |
+			 IFA_F_MANAGETEMPADDR | IFA_F_NOPREFIXROUTE |
+			 IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
 
 	idev = ipv6_find_idev(dev);
 	if (IS_ERR(idev))
 		return PTR_ERR(idev);
 
 	if (!ipv6_allow_optimistic_dad(net, idev))
-		ifa_flags &= ~IFA_F_OPTIMISTIC;
+		cfg.ifa_flags &= ~IFA_F_OPTIMISTIC;
 
-	if (ifa_flags & IFA_F_NODAD && ifa_flags & IFA_F_OPTIMISTIC) {
+	if (cfg.ifa_flags & IFA_F_NODAD &&
+	    cfg.ifa_flags & IFA_F_OPTIMISTIC) {
 		NL_SET_ERR_MSG(extack, "IFA_F_NODAD and IFA_F_OPTIMISTIC are mutually exclusive");
 		return -EINVAL;
 	}
 
-	ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
+	ifa = ipv6_get_ifaddr(net, cfg.pfx, dev, 1);
 	if (!ifa) {
 		/*
 		 * It would be best to check for !NLM_F_CREATE here but
 		 * userspace already relies on not having to provide this.
 		 */
-		return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx,
-				      ifm->ifa_prefixlen, ifa_flags,
-				      preferred_lft, valid_lft, extack);
+		return inet6_addr_add(net, ifm->ifa_index, &cfg, extack);
 	}
 
 	if (nlh->nlmsg_flags & NLM_F_EXCL ||
 	    !(nlh->nlmsg_flags & NLM_F_REPLACE))
 		err = -EEXIST;
 	else
-		err = inet6_addr_modify(ifa, ifa_flags, preferred_lft, valid_lft);
+		err = inet6_addr_modify(ifa, cfg.ifa_flags, cfg.preferred_lft,
+					cfg.valid_lft);
 
 	in6_ifa_put(ifa);
 
-- 
2.11.0

^ permalink raw reply related

* [PATCH RFC net-next 0/9] net: Add address attribute to control metric of prefix route
From: dsahern @ 2018-05-23 22:57 UTC (permalink / raw)
  To: netdev; +Cc: roopa, David Ahern

From: David Ahern <dsahern@gmail.com>

For use cases such as VRR (Virtual Router Redundancy) interface managers
want efficient control over the order of prefix routes when multiple
interfaces have addresses with overlapping/duplicate subnets.

Currently, if two interfaces have addresses in the same subnet, the order
of the prefix route entries is determined by the order in which the
addresses are assigned or the links brought up. Any actions like cycling
an interface up and down changes that order. This set adds a new attribute
for addresses to allow a user to specify the metric of the prefix route
associated with an address giving interface managers better and more
efficient control of the order of prefix routes.

Patches 1-5 refactor IPv6 address add functions to pass an ifa6_config
struct. The functions currently have a long list of arguments and adding
the metric just makes it worse. Because of the overall diff size in
moving the arguments to a struct, the change is done in changes to make
it easier to review starting with the bottom function and pushing the
struct up to callers in each successive patch.

Patch 6 introduces the new attribute.

Patches 7 and 8 add support for the new attribute to IPv4 and IPv6
addresses.

Patch 9 adds a set of test cases.

David Ahern (9):
  net/ipv6: Introduce ifa6_config struct
  net/ipv6: Convert ipv6_add_addr to struct ifa6_config
  net/ipv6: Remove temp variables in ipv6_create_tempaddr
  net/ipv6: Pass ifa6_config struct to inet6_addr_add
  net/ipv6: Pass ifa6_config struct to inet6_addr_modify
  net: Add IFA_RT_PRIORITY address attribute
  net/ipv4: Add support for specifying metric of connected routes
  net/ipv6: Add support for specifying metric of connected routes
  selftests: fib_tests: Add prefix route tests with metric

 include/linux/inetdevice.h               |   1 +
 include/net/addrconf.h                   |  13 ++
 include/net/if_inet6.h                   |   1 +
 include/net/route.h                      |   1 +
 include/uapi/linux/if_addr.h             |   1 +
 net/ipv4/devinet.c                       |  14 ++
 net/ipv4/fib_frontend.c                  |  56 ++++-
 net/ipv6/addrconf.c                      | 360 +++++++++++++++++++------------
 tools/testing/selftests/net/fib_tests.sh | 181 +++++++++++++++-
 9 files changed, 475 insertions(+), 153 deletions(-)
 mode change 100755 => 100644 tools/testing/selftests/net/fib_tests.sh

-- 
2.11.0

^ permalink raw reply

* [PATCH RFC net-next 1/9] net/ipv6: Introduce ifa6_config struct
From: dsahern @ 2018-05-23 22:57 UTC (permalink / raw)
  To: netdev; +Cc: roopa, David Ahern
In-Reply-To: <20180523225727.11386-1-dsahern@kernel.org>

From: David Ahern <dsahern@gmail.com>

Move config parameters for adding an ipv6 address to a struct. struct
names stem from inet6_rtm_newaddr which is the modern handler for
adding an address.

Signed-off-by: David Ahern <dsahern@gmail.com>
---
 include/net/addrconf.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index ff766ab207e0..1e3ef04176d6 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -59,6 +59,18 @@ struct in6_validator_info {
 	struct netlink_ext_ack	*extack;
 };
 
+struct ifa6_config {
+	const struct in6_addr *pfx;
+	unsigned int plen;
+
+	const struct in6_addr *peer_pfx;
+
+	u32 ifa_flags;
+	u32 preferred_lft;
+	u32 valid_lft;
+	u16 scope;
+};
+
 int addrconf_init(void);
 void addrconf_cleanup(void);
 
-- 
2.11.0

^ permalink raw reply related

* Re: [PATCH net-next v4 0/2] openvswitch: Support conntrack zone limit
From: Pravin Shelar @ 2018-05-23 22:55 UTC (permalink / raw)
  To: David Miller; +Cc: Yi-Hung Wei, Linux Kernel Network Developers
In-Reply-To: <20180523.131349.518306151458208056.davem@davemloft.net>

On Wed, May 23, 2018 at 10:13 AM, David Miller <davem@davemloft.net> wrote:
> From: Yi-Hung Wei <yihung.wei@gmail.com>
> Date: Mon, 21 May 2018 17:16:03 -0700
>
>> v3->v4:
>>   - Addresses comments from Parvin that include simplify netlink API,
>>     and remove unncessary RCU lockings.
>>   - Rebases to master.
>
> Pravin, please review.

I will finish review in few hours.

^ permalink raw reply

* Re: [PATCH v2 net] netfilter: provide correct argument to nla_strlcpy()
From: Pablo Neira Ayuso @ 2018-05-23 22:53 UTC (permalink / raw)
  To: Florian Westphal; +Cc: Eric Dumazet, David S . Miller, netdev, Eric Dumazet
In-Reply-To: <20180522061439.2f2hc34afpn6hgbx@breakpoint.cc>

On Tue, May 22, 2018 at 08:14:39AM +0200, Florian Westphal wrote:
> Eric Dumazet <edumazet@google.com> wrote:
> > Recent patch forgot to remove nla_data(), upsetting syzkaller a bit.
> 
> Duuuh.... Thanks Eric.
> Acked-by: Florian Westphal <fw@strlen.de>

Applied, thanks!

^ permalink raw reply

* Re: [PATCH bpf-next v3 01/15] net: initial AF_XDP skeleton
From: Stephen Hemminger @ 2018-05-23 22:50 UTC (permalink / raw)
  To: Björn Töpel
  Cc: magnus.karlsson, alexander.h.duyck, alexander.duyck,
	john.fastabend, ast, brouer, willemdebruijn.kernel, daniel, mst,
	netdev, Björn Töpel, michael.lundkvist,
	jesse.brandeburg, anjali.singhai, qi.z.zhang
In-Reply-To: <20180502110136.3738-2-bjorn.topel@gmail.com>

On Wed,  2 May 2018 13:01:22 +0200
Björn Töpel <bjorn.topel@gmail.com> wrote:

> diff --git a/net/xdp/Kconfig b/net/xdp/Kconfig
> new file mode 100644
> index 000000000000..90e4a7152854
> --- /dev/null
> +++ b/net/xdp/Kconfig
> @@ -0,0 +1,7 @@
> +config XDP_SOCKETS
> +	bool "XDP sockets"
> +	depends on BPF_SYSCALL
> +	default n
> +	help
> +	  XDP sockets allows a channel between XDP programs and
> +	  userspace applications.

Why is XDP not supported as a module?
Most distributions will want it to be a module so that it is not loaded
unless used, and AF_XDP could be also be disabled by blacklisting the module.

^ permalink raw reply

* Re: [PATCH net-next v15 4/7] sch_cake: Add NAT awareness to packet classifier
From: Pablo Neira Ayuso @ 2018-05-23 22:46 UTC (permalink / raw)
  To: Toke Høiland-Jørgensen; +Cc: netdev, cake, netfilter-devel
In-Reply-To: <87k1rvg4qt.fsf@toke.dk>

On Tue, May 22, 2018 at 04:11:06PM +0200, Toke Høiland-Jørgensen wrote:
> Pablo Neira Ayuso <pablo@netfilter.org> writes:
> 
> > Hi Toke,
> >
> > On Tue, May 22, 2018 at 03:57:38PM +0200, Toke Høiland-Jørgensen wrote:
> >> When CAKE is deployed on a gateway that also performs NAT (which is a
> >> common deployment mode), the host fairness mechanism cannot distinguish
> >> internal hosts from each other, and so fails to work correctly.
> >> 
> >> To fix this, we add an optional NAT awareness mode, which will query the
> >> kernel conntrack mechanism to obtain the pre-NAT addresses for each packet
> >> and use that in the flow and host hashing.
> >> 
> >> When the shaper is enabled and the host is already performing NAT, the cost
> >> of this lookup is negligible. However, in unlimited mode with no NAT being
> >> performed, there is a significant CPU cost at higher bandwidths. For this
> >> reason, the feature is turned off by default.
> >> 
> >> Cc: netfilter-devel@vger.kernel.org
> >> Signed-off-by: Toke Høiland-Jørgensen <toke@toke.dk>
> >> ---
> >>  net/sched/sch_cake.c |   79 ++++++++++++++++++++++++++++++++++++++++++++++++++
> >>  1 file changed, 79 insertions(+)
> >> 
> >> diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
> >> index 68ac908470f1..6f7cae705c84 100644
> >> --- a/net/sched/sch_cake.c
> >> +++ b/net/sched/sch_cake.c
> >> @@ -71,6 +71,12 @@
> >>  #include <net/tcp.h>
> >>  #include <net/flow_dissector.h>
> >>  
> >> +#if IS_REACHABLE(CONFIG_NF_CONNTRACK)
> >> +#include <net/netfilter/nf_conntrack_core.h>
> >> +#include <net/netfilter/nf_conntrack_zones.h>
> >> +#include <net/netfilter/nf_conntrack.h>
> >> +#endif
> >> +
> >>  #define CAKE_SET_WAYS (8)
> >>  #define CAKE_MAX_TINS (8)
> >>  #define CAKE_QUEUES (1024)
> >> @@ -516,6 +522,60 @@ static bool cobalt_should_drop(struct cobalt_vars *vars,
> >>  	return drop;
> >>  }
> >>  
> >> +#if IS_REACHABLE(CONFIG_NF_CONNTRACK)
> >> +
> >> +static void cake_update_flowkeys(struct flow_keys *keys,
> >> +				 const struct sk_buff *skb)
> >> +{
> >> +	const struct nf_conntrack_tuple *tuple;
> >> +	enum ip_conntrack_info ctinfo;
> >> +	struct nf_conn *ct;
> >> +	bool rev = false;
> >> +
> >> +	if (tc_skb_protocol(skb) != htons(ETH_P_IP))
> >> +		return;
> >> +
> >> +	ct = nf_ct_get(skb, &ctinfo);
> >> +	if (ct) {
> >> +		tuple = nf_ct_tuple(ct, CTINFO2DIR(ctinfo));
> >> +	} else {
> >> +		const struct nf_conntrack_tuple_hash *hash;
> >> +		struct nf_conntrack_tuple srctuple;
> >> +
> >> +		if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
> >> +				       NFPROTO_IPV4, dev_net(skb->dev),
> >> +				       &srctuple))
> >> +			return;
> >> +
> >> +		hash = nf_conntrack_find_get(dev_net(skb->dev),
> >> +					     &nf_ct_zone_dflt,
> >> +					     &srctuple);
> >> +		if (!hash)
> >> +			return;
> >> +
> >> +		rev = true;
> >> +		ct = nf_ct_tuplehash_to_ctrack(hash);
> >> +		tuple = nf_ct_tuple(ct, !hash->tuple.dst.dir);
> >> +	}
> >> +
> >> +	keys->addrs.v4addrs.src = rev ? tuple->dst.u3.ip : tuple->src.u3.ip;
> >> +	keys->addrs.v4addrs.dst = rev ? tuple->src.u3.ip : tuple->dst.u3.ip;
> >> +
> >> +	if (keys->ports.ports) {
> >> +		keys->ports.src = rev ? tuple->dst.u.all : tuple->src.u.all;
> >> +		keys->ports.dst = rev ? tuple->src.u.all : tuple->dst.u.all;
> >> +	}
> >> +	if (rev)
> >> +		nf_ct_put(ct);
> >> +}
> >
> > This is going to pull in the nf_conntrack module, even if you may not
> > want it, as soon as cake is in place.
> 
> Yeah, we are aware of that; we get a moddep on nf_conntrack. Our main
> deployment scenario has been home routers where conntrack is used
> anyway, so this has not been much of an issue. However, if there is a
> way to avoid this, and instead detect at runtime if conntrack is
> available, that would certainly be useful. Is there? :)

Yes, there is.

You place this function in net/netfilter/nf_conntrack_core.c, call it
nf_conntrack_get_tuple() which internally uses a rcu hook for this.
See nf_ct_attach() and ip_ct_attach() in net/netfilter/core.c for
instance.

This allows you to avoid the dependency with nf_conntrack (which would
be only called if the module has been explicitly loaded), which is
what you're searching for.

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox