Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH net-next v3 0/5] virtio-net tx napi
From: David Miller @ 2017-04-25 13:09 UTC (permalink / raw)
  To: willemdebruijn.kernel; +Cc: netdev, virtualization, willemb, mst
In-Reply-To: <20170424174930.82623-1-willemdebruijn.kernel@gmail.com>

From: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
Date: Mon, 24 Apr 2017 13:49:25 -0400

> Add napi for virtio-net transmit completion processing.

Series applied, thanks.

^ permalink raw reply

* Re: [PATCH net-next v8 2/3] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
From: Jamal Hadi Salim @ 2017-04-25 13:01 UTC (permalink / raw)
  To: Jiri Pirko; +Cc: davem, xiyou.wangcong, eric.dumazet, netdev
In-Reply-To: <20170425121338.GC1867@nanopsycho.orion>

On 17-04-25 08:13 AM, Jiri Pirko wrote:
> Tue, Apr 25, 2017 at 01:54:06PM CEST, jhs@mojatatu.com wrote:


[..]

>> -#define TCAA_MAX 1
>> +/* tcamsg flags stored in attribute TCA_ROOT_FLAGS
>> + *
>> + * TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
>> + * actions in a dump. All dump responses will contain the number of actions
>> + * being dumped stored in for user app's consumption in TCA_ROOT_COUNT
>> + *
>> + */
>> +#define TCA_FLAG_LARGE_DUMP_ON		(1 << 0)
>
> BIT (I think I mentioned this before)
>

You did - but i took it out about two submissions back (per cover
letter) because it is no part of UAPI today. I noticed devlink was
using it but they defined  their own variant.
So if i added this, iproute2 doesnt compile. I could fix iproute2
to move it somewhere to a common header then restore this.

>> +#define VALID_TCA_FLAGS TCA_FLAG_LARGE_DUMP_ON
>
> Again, namespace please. "TCA_ROOT_FLAGS_VALID"

Good point.

> Why is this UAPI?
>

Should not be. I will fix in next update.


>
>>
>> /* New extended info filters for IFLA_EXT_MASK */
>> #define RTEXT_FILTER_VF		(1 << 0)
>> diff --git a/net/sched/act_api.c b/net/sched/act_api.c
>> index 9ce22b7..2756213 100644
>> --- a/net/sched/act_api.c
>> +++ b/net/sched/act_api.c
>> @@ -83,6 +83,7 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
>> 			   struct netlink_callback *cb)
>> {
>> 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
>> +	u32 act_flags = cb->args[2];
>> 	struct nlattr *nest;
>>
>> 	spin_lock_bh(&hinfo->lock);
>> @@ -111,14 +112,18 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
>> 			}
>> 			nla_nest_end(skb, nest);
>> 			n_i++;
>> -			if (n_i >= TCA_ACT_MAX_PRIO)
>> +			if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) &&
>> +			    n_i >= TCA_ACT_MAX_PRIO)
>> 				goto done;
>> 		}
>> 	}
>> done:
>> 	spin_unlock_bh(&hinfo->lock);
>> -	if (n_i)
>> +	if (n_i) {
>> 		cb->args[0] += n_i;
>> +		if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
>> +			cb->args[1] = n_i;
>> +	}
>> 	return n_i;
>>
>> nla_put_failure:
>> @@ -993,11 +998,15 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
>> 	return tcf_add_notify(net, n, &actions, portid);
>> }
>>
>> +static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
>> +	[TCA_ROOT_FLAGS]      = { .type = NLA_U32 },
>> +};
>> +
>> static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
>> 			 struct netlink_ext_ack *extack)
>> {
>> 	struct net *net = sock_net(skb->sk);
>> -	struct nlattr *tca[TCAA_MAX + 1];
>> +	struct nlattr *tca[TCA_ROOT_MAX + 1];
>> 	u32 portid = skb ? NETLINK_CB(skb).portid : 0;
>> 	int ret = 0, ovr = 0;
>>
>> @@ -1005,7 +1014,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
>> 	    !netlink_capable(skb, CAP_NET_ADMIN))
>> 		return -EPERM;
>>
>> -	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCAA_MAX, NULL,
>> +	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL,
>> 			  extack);
>> 	if (ret < 0)
>> 		return ret;
>> @@ -1046,16 +1055,12 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
>> 	return ret;
>> }
>>
>> -static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
>> +static struct nlattr *find_dump_kind(struct nlattr **nla)
>> {
>> 	struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
>> 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
>> -	struct nlattr *nla[TCAA_MAX + 1];
>> 	struct nlattr *kind;
>>
>> -	if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX,
>> -			NULL, NULL) < 0)
>> -		return NULL;
>> 	tb1 = nla[TCA_ACT_TAB];
>> 	if (tb1 == NULL)
>> 		return NULL;
>> @@ -1073,6 +1078,16 @@ static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
>> 	return kind;
>> }
>>
>> +static inline bool tca_flags_valid(u32 act_flags)
>> +{
>> +	u32 invalid_flags_mask  = ~VALID_TCA_FLAGS;
>> +
>> +	if (act_flags & invalid_flags_mask)
>> +		return false;
>
> I don't see how this resolves anything. VALID_TCA_FLAGS is set in stone
> not going to change anytime in future, right?

Every time a new bit gets added VALID_TCA_FLAGS changes.

>Then the TCA_ROOT_FLAGS
> attr will always contain only one flag, right?

Not true - forever. Just in this patch discussion:
I have added 2 other flags since removed. So I cant predict the
future. You keep coming back to this assumption there will always
ever be this one flag. I am not following how you reach this
conclusion.

> Then, I don't see why do we need this dance... u8 flag attr resolves
> this in elegant way. I know I sound like a broken record. This is the
> last time I'm commenting on this. I give up.
>

Why is a u8 better than u32 Jiri?
The TLV space consumed is still 64 bits in both cases. If I define u8,
u16, u32 - it is still 64 bits being alloced. If i use a u8 then i have
24 bits which are pads - given current discussions I can never ever use
again!

If i keep 32 bits I am free to use those without ever changing the
data structure (except for the restrictions to now make sure nothing
gets set).

cheers,
jamal

^ permalink raw reply

* RE: [PATCH net] driver/net: Fix possible memleaks when fail to register_netdevice
From: Gao Feng @ 2017-04-25 12:51 UTC (permalink / raw)
  To: gfree.wind, jiri, davem, kuznet, jmorris, yoshfuji, kaber,
	steffen.klassert, herbert, netdev
In-Reply-To: <1493121710-27910-1-git-send-email-gfree.wind@foxmail.com>

> From: Gao Feng <fgao@ikuai8.com>
> 
> These drivers allocate kinds of resources in init routine, and free some
> resources in the destructor of net_device. It may cause memleak when some
> errors happen after register_netdevice invokes the init callback. Because
only
> the uninit callback is invoked in the error handler of register_netdevice,
but the
> destructor not. As a result, some resources are lost forever.
> 
> Now invokes the destructor instead of free_netdev somewhere, and free the
> left resources in the newlink func when fail to register_netdevice.
> 
> Signed-off-by: Gao Feng <fgao@ikuai8.com>
> ---
>  drivers/net/dummy.c      |  2 +-
>  drivers/net/ifb.c        |  2 +-
>  drivers/net/loopback.c   |  2 +-
>  drivers/net/team/team.c  | 11 ++++++++++-
>  drivers/net/veth.c       |  4 ++--
>  net/8021q/vlan_netlink.c |  6 +++++-
>  net/ipv4/ip_tunnel.c     |  9 ++++++++-
>  net/ipv6/ip6_gre.c       |  6 +++++-
>  net/ipv6/ip6_tunnel.c    | 12 ++++++++++--
>  net/ipv6/ip6_vti.c       |  7 ++++++-
>  net/ipv6/sit.c           |  5 ++++-
>  11 files changed, 53 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index
> 2c80611..55b8a50 100644
> --- a/drivers/net/dummy.c
> +++ b/drivers/net/dummy.c
> @@ -383,7 +383,7 @@ static int __init dummy_init_one(void)
>  	return 0;
> 
>  err:
> -	free_netdev(dev_dummy);
> +	dummy_free_netdev(dev_dummy);
>  	return err;
>  }
> 
> diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 312fce7..a298371
100644
> --- a/drivers/net/ifb.c
> +++ b/drivers/net/ifb.c
> @@ -318,7 +318,7 @@ static int __init ifb_init_one(int index)
>  	return 0;
> 
>  err:
> -	free_netdev(dev_ifb);
> +	ifb_dev_free(dev_ifb);
>  	return err;
>  }
> 
> diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index
> b23b719..c4e1d4c 100644
> --- a/drivers/net/loopback.c
> +++ b/drivers/net/loopback.c
> @@ -208,7 +208,7 @@ static __net_init int loopback_net_init(struct net
*net)
> 
> 
>  out_free_netdev:
> -	free_netdev(dev);
> +	loopback_dev_free(dev);
>  out:
>  	if (net_eq(net, &init_net))
>  		panic("loopback: Failed to register netdevice: %d\n", err);
diff --git
> a/drivers/net/team/team.c b/drivers/net/team/team.c index f8c81f1..0bc80fb
> 100644
> --- a/drivers/net/team/team.c
> +++ b/drivers/net/team/team.c
> @@ -2109,10 +2109,19 @@ static void team_setup(struct net_device *dev)
> static int team_newlink(struct net *src_net, struct net_device *dev,
>  			struct nlattr *tb[], struct nlattr *data[])  {
> +	int ret;
> +
>  	if (tb[IFLA_ADDRESS] == NULL)
>  		eth_hw_addr_random(dev);
> 
> -	return register_netdevice(dev);
> +	ret = register_netdevice(dev);
> +	if (ret) {
> +		struct team *team = netdev_priv(dev);
> +
> +		free_percpu(team->pcpu_stats);
> +	}
> +
> +	return ret;
>  }
> 
>  static int team_validate(struct nlattr *tb[], struct nlattr *data[]) diff
--git
> a/drivers/net/veth.c b/drivers/net/veth.c index 8c39d6d..f60f5ee 100644
> --- a/drivers/net/veth.c
> +++ b/drivers/net/veth.c
> @@ -457,13 +457,13 @@ static int veth_newlink(struct net *src_net, struct
> net_device *dev,
>  	return 0;
> 
>  err_register_dev:
> -	/* nothing to do */
> +	free_percpu(dev->vstats);
>  err_configure_peer:
>  	unregister_netdevice(peer);
>  	return err;
> 
>  err_register_peer:
> -	free_netdev(peer);
> +	veth_dev_free(peer);
>  	return err;
>  }
> 
> diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index
> 1270207..a15826a 100644
> --- a/net/8021q/vlan_netlink.c
> +++ b/net/8021q/vlan_netlink.c
> @@ -156,7 +156,11 @@ static int vlan_newlink(struct net *src_net, struct
> net_device *dev,
>  	if (err < 0)
>  		return err;
> 
> -	return register_vlan_dev(dev);
> +	err = register_vlan_dev(dev);
> +	if (err)
> +		free_percpu(vlan->vlan_pcpu_stats);
> +
> +	return err;
>  }
> 
>  static inline size_t vlan_qos_map_size(unsigned int n) diff --git
> a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 823abae..4acb296
> 100644
> --- a/net/ipv4/ip_tunnel.c
> +++ b/net/ipv4/ip_tunnel.c
> @@ -63,6 +63,8 @@
>  #include <net/ip6_route.h>
>  #endif
> 
> +static void ip_tunnel_dev_free(struct net_device *dev);
> +
>  static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)  {
>  	return hash_32((__force u32)key ^ (__force u32)remote, @@ -285,7
> +287,7 @@ static struct net_device *__ip_tunnel_create(struct net *net,
>  	return dev;
> 
>  failed_free:
> -	free_netdev(dev);
> +	ip_tunnel_dev_free(dev);
>  failed:
>  	return ERR_PTR(err);
>  }
> @@ -1099,7 +1101,12 @@ int ip_tunnel_newlink(struct net_device *dev,
> struct nlattr *tb[],
>  		dev->mtu = mtu;
> 
>  	ip_tunnel_add(itn, nt);
> +
> +	return 0;
>  out:
> +	gro_cells_destroy(&nt->gro_cells);
> +	dst_cache_destroy(&nt->dst_cache);
> +	free_percpu(dev->tstats);
>  	return err;
>  }
>  EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
> diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index
6fcb7cb..d409ad1
> 100644
> --- a/net/ipv6/ip6_gre.c
> +++ b/net/ipv6/ip6_gre.c
> @@ -77,6 +77,7 @@ struct ip6gre_net {
>  static void ip6gre_tunnel_setup(struct net_device *dev);  static void
> ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);  static
void
> ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
> +static void ip6gre_dev_free(struct net_device *dev);
> 
>  /* Tunnel hash table */
> 
> @@ -351,7 +352,7 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net
> *net,
>  	return nt;
> 
>  failed_free:
> -	free_netdev(dev);
> +	ip6gre_dev_free(dev);
>  	return NULL;
>  }
> 
> @@ -1388,7 +1389,10 @@ static int ip6gre_newlink(struct net *src_net,
struct
> net_device *dev,
>  	dev_hold(dev);
>  	ip6gre_tunnel_link(ign, nt);
> 
> +	return 0;
>  out:
> +	dst_cache_destroy(&nt->dst_cache);
> +	free_percpu(dev->tstats);
>  	return err;
>  }
> 
> diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index
> 75fac93..95f512c 100644
> --- a/net/ipv6/ip6_tunnel.c
> +++ b/net/ipv6/ip6_tunnel.c
> @@ -1960,11 +1960,12 @@ static int ip6_tnl_newlink(struct net *src_net,
> struct net_device *dev,
>  	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
>  	struct ip6_tnl *nt, *t;
>  	struct ip_tunnel_encap ipencap;
> +	int err;
> 
>  	nt = netdev_priv(dev);
> 
>  	if (ip6_tnl_netlink_encap_parms(data, &ipencap)) {
> -		int err = ip6_tnl_encap_setup(nt, &ipencap);
> +		err = ip6_tnl_encap_setup(nt, &ipencap);
> 
>  		if (err < 0)
>  			return err;
> @@ -1981,7 +1982,14 @@ static int ip6_tnl_newlink(struct net *src_net,
> struct net_device *dev,
>  			return -EEXIST;
>  	}
> 
> -	return ip6_tnl_create2(dev);
> +	err = ip6_tnl_create2(dev);
> +	if (err) {
> +		gro_cells_destroy(&t->gro_cells);
> +		dst_cache_destroy(&t->dst_cache);
> +		free_percpu(dev->tstats);
> +	}
> +
> +	return err;
>  }
> 
>  static int ip6_tnl_changelink(struct net_device *dev, struct nlattr
*tb[], diff
> --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 3d8a3b6..b201eef
100644
> --- a/net/ipv6/ip6_vti.c
> +++ b/net/ipv6/ip6_vti.c
> @@ -940,6 +940,7 @@ static int vti6_newlink(struct net *src_net, struct
> net_device *dev,  {
>  	struct net *net = dev_net(dev);
>  	struct ip6_tnl *nt;
> +	int ret;
> 
>  	nt = netdev_priv(dev);
>  	vti6_netlink_parms(data, &nt->parms);
> @@ -949,7 +950,11 @@ static int vti6_newlink(struct net *src_net, struct
> net_device *dev,
>  	if (vti6_locate(net, &nt->parms, 0))
>  		return -EEXIST;
> 
> -	return vti6_tnl_create2(dev);
> +	ret = vti6_tnl_create2(dev);
> +	if (ret)
> +		free_percpu(dev->tstats);
> +
> +	return ret;
>  }
> 
>  static void vti6_dellink(struct net_device *dev, struct list_head *head)
diff
> --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 99853c6..f45dc4a 100644
> --- a/net/ipv6/sit.c
> +++ b/net/ipv6/sit.c
> @@ -1555,8 +1555,11 @@ static int ipip6_newlink(struct net *src_net,
struct
> net_device *dev,
>  		return -EEXIST;
> 
>  	err = ipip6_tunnel_create(dev);
> -	if (err < 0)
> +	if (err < 0) {
> +		dst_cache_destroy(&nt->dst_cache);
> +		free_percpu(dev->tstats);
>  		return err;
> +	}
> 
>  #ifdef CONFIG_IPV6_SIT_6RD
>  	if (ipip6_netlink_6rd_parms(data, &ip6rd))
> --
> 1.9.1

Actually I have another simpler solution to fix it. 
When newlink failed, its caller "rtnl_newlink" invokes the destructor if it
exists like the following:
	if (dev->reg_state == NETREG_UNINITIALIZED)
		if (dev->destructor)
			dev->destructor(dev);
		else
			free_netdev(dev);

There are two reasons I don't adopt this solution.
1. I don't know if it is against the original purpose of dev->destructor and
rtnl_newlink.
2. It breaks the design rule that "who malloc, who free".

But it is one more simple fix, then what's your opinion?

Best Regards
Feng

^ permalink raw reply

* Re: [PATCH v3 net] net: ipv6: regenerate host route if moved to gc list
From: Andrey Konovalov @ 2017-04-25 12:50 UTC (permalink / raw)
  To: David Ahern; +Cc: netdev, Dmitry Vyukov, mmanning, Martin KaFai Lau
In-Reply-To: <1493046549-17420-1-git-send-email-dsa@cumulusnetworks.com>

On Mon, Apr 24, 2017 at 5:09 PM, David Ahern <dsa@cumulusnetworks.com> wrote:
> Taking down the loopback device wreaks havoc on IPv6 routing. By
> extension, taking down a VRF device wreaks havoc on its table.
>
> Dmitry and Andrey both reported heap out-of-bounds reports in the IPv6
> FIB code while running syzkaller fuzzer. The root cause is a dead dst
> that is on the garbage list gets reinserted into the IPv6 FIB. While on
> the gc (or perhaps when it gets added to the gc list) the dst->next is
> set to an IPv4 dst. A subsequent walk of the ipv6 tables causes the
> out-of-bounds access.
>
> Andrey's reproducer was the key to getting to the bottom of this.
>
> With IPv6, host routes for an address have the dst->dev set to the
> loopback device. When the 'lo' device is taken down, rt6_ifdown initiates
> a walk of the fib evicting routes with the 'lo' device which means all
> host routes are removed. That process moves the dst which is attached to
> an inet6_ifaddr to the gc list and marks it as dead.
>
> The recent change to keep global IPv6 addresses added a new function,
> fixup_permanent_addr, that is called on admin up. That function restarts
> dad for an inet6_ifaddr and when it completes the host route attached
> to it is inserted into the fib. Since the route was marked dead and
> moved to the gc list, re-inserting the route causes the reported
> out-of-bounds accesses. If the device with the address is taken down
> or the address is removed, the WARN_ON in fib6_del is triggered.
>
> All of those faults are fixed by regenerating the host route if the
> existing one has been moved to the gc list, something that can be
> determined by checking if the rt6i_ref counter is 0.

Hi David,

I've been running syzkaller with your patch and got another report
from ip6_pol_route.

It happened only once so far and I couldn't reproduce it.

==================================================================
BUG: KASAN: slab-out-of-bounds in find_rr_leaf net/ipv6/route.c:722
[inline] at addr ffff880033dddaa8
BUG: KASAN: slab-out-of-bounds in rt6_select net/ipv6/route.c:758
[inline] at addr ffff880033dddaa8
BUG: KASAN: slab-out-of-bounds in ip6_pol_route+0x1b1e/0x1ba0
net/ipv6/route.c:1091 at addr ffff880033dddaa8
Read of size 4 by task syz-executor7/9808
CPU: 2 PID: 9808 Comm: syz-executor7 Not tainted 4.11.0-rc8+ #268
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:16 [inline]
 dump_stack+0x192/0x22d lib/dump_stack.c:52
 kasan_object_err+0x1c/0x70 mm/kasan/report.c:164
 print_address_description mm/kasan/report.c:202 [inline]
 kasan_report_error mm/kasan/report.c:291 [inline]
 kasan_report+0x252/0x510 mm/kasan/report.c:347
 __asan_report_load4_noabort+0x14/0x20 mm/kasan/report.c:367
 find_rr_leaf net/ipv6/route.c:722 [inline]
 rt6_select net/ipv6/route.c:758 [inline]
 ip6_pol_route+0x1b1e/0x1ba0 net/ipv6/route.c:1091
 ip6_pol_route_output+0x4c/0x60 net/ipv6/route.c:1212
 fib6_rule_action+0x261/0x8a0 net/ipv6/fib6_rules.c:100
 fib_rules_lookup+0x34b/0xae0 net/core/fib_rules.c:265
 fib6_rule_lookup+0x175/0x360 net/ipv6/fib6_rules.c:44
 ip6_route_output_flags+0x260/0x2f0 net/ipv6/route.c:1240
 ip6_route_output include/net/ip6_route.h:79 [inline]
 ip6_dst_lookup_tail+0xe59/0x1640 net/ipv6/ip6_output.c:959
 ip6_dst_lookup_flow+0xb1/0x260 net/ipv6/ip6_output.c:1082
 ip6_sk_dst_lookup_flow+0x2c6/0x7f0 net/ipv6/ip6_output.c:1113
 udpv6_sendmsg+0x2350/0x3310 net/ipv6/udp.c:1219
 inet_sendmsg+0x164/0x490 net/ipv4/af_inet.c:762
 sock_sendmsg_nosec net/socket.c:633 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:643
 SYSC_sendto+0x660/0x810 net/socket.c:1696
 SyS_sendto+0x40/0x50 net/socket.c:1664
 entry_SYSCALL_64_fastpath+0x1a/0xa9
RIP: 0033:0x4458d9
RSP: 002b:00007ff3a5343b58 EFLAGS: 00000282 ORIG_RAX: 000000000000002c
RAX: ffffffffffffffda RBX: 0000000000708000 RCX: 00000000004458d9
RDX: 0000000000000fa8 RSI: 0000000020000000 RDI: 0000000000000005
RBP: 0000000000004300 R08: 0000000020006000 R09: 000000000000001c
R10: 0000000000040000 R11: 0000000000000282 R12: 00000000006e33c0
R13: 0000000000000005 R14: 0000000000000029 R15: 0000000000000023
Object at ffff880033ddd940, in cache ip_dst_cache size: 224
Allocated:
PID = 9717
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
 save_stack+0x43/0xd0 mm/kasan/kasan.c:513
 set_track mm/kasan/kasan.c:525 [inline]
 kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:616
 kasan_slab_alloc+0x12/0x20 mm/kasan/kasan.c:555
 slab_post_alloc_hook mm/slab.h:456 [inline]
 slab_alloc_node mm/slub.c:2718 [inline]
 slab_alloc mm/slub.c:2726 [inline]
 kmem_cache_alloc+0xa8/0x160 mm/slub.c:2731
 dst_alloc+0x11b/0x1a0 net/core/dst.c:209
 rt_dst_alloc+0xf0/0x5d0 net/ipv4/route.c:1497
 __mkroute_output net/ipv4/route.c:2181 [inline]
 __ip_route_output_key_hash+0xdc4/0x2930 net/ipv4/route.c:2391
 __ip_route_output_key include/net/route.h:123 [inline]
 ip_route_output_flow+0x29/0xa0 net/ipv4/route.c:2477
 ip_route_output_key include/net/route.h:133 [inline]
 sctp_v4_get_dst+0x606/0x1420 net/sctp/protocol.c:458
 sctp_transport_route+0xa8/0x420 net/sctp/transport.c:287
 sctp_assoc_add_peer+0x5a5/0x1470 net/sctp/associola.c:656
 sctp_sendmsg+0x18a8/0x3b50 net/sctp/socket.c:1871
 inet_sendmsg+0x164/0x490 net/ipv4/af_inet.c:762
 sock_sendmsg_nosec net/socket.c:633 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:643
 SYSC_sendto+0x660/0x810 net/socket.c:1696
 SyS_sendto+0x40/0x50 net/socket.c:1664
 entry_SYSCALL_64_fastpath+0x1a/0xa9
Freed:
PID = 868
 save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
 save_stack+0x43/0xd0 mm/kasan/kasan.c:513
 set_track mm/kasan/kasan.c:525 [inline]
 kasan_slab_free+0x73/0xc0 mm/kasan/kasan.c:589
 slab_free_hook mm/slub.c:1357 [inline]
 slab_free_freelist_hook mm/slub.c:1379 [inline]
 slab_free mm/slub.c:2961 [inline]
 kmem_cache_free+0x72/0x190 mm/slub.c:2983
 dst_destroy+0x24c/0x390 net/core/dst.c:269
 dst_free include/net/dst.h:428 [inline]
 rt_fibinfo_free net/ipv4/fib_semantics.c:155 [inline]
 free_fib_info_rcu+0x852/0xa10 net/ipv4/fib_semantics.c:214
 __rcu_reclaim kernel/rcu/rcu.h:118 [inline]
 rcu_do_batch.isra.65+0x6de/0xbd0 kernel/rcu/tree.c:2879
 invoke_rcu_callbacks kernel/rcu/tree.c:3142 [inline]
 __rcu_process_callbacks kernel/rcu/tree.c:3109 [inline]
 rcu_process_callbacks+0x23f/0x810 kernel/rcu/tree.c:3126
 __do_softirq+0x253/0x78b kernel/softirq.c:284
Memory state around the buggy address:
 ffff880033ddd980: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 ffff880033ddda00: 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc fc
>ffff880033ddda80: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
                                  ^
 ffff880033dddb00: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff880033dddb80: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
==================================================================


>
> Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional")
> Reported-by: Dmitry Vyukov <dvyukov@google.com>
> Reported-by: Andrey Konovalov <andreyknvl@google.com>
> Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
> ---
> v3
> - removed 'if (prev)' and just call ip6_rt_put; added comment about spinlock
>
> v2
> - change ifp->rt under spinlock vs cmpxchg
> - add comment about rt6i_ref == 0
>
>  net/ipv6/addrconf.c | 15 +++++++++++++--
>  1 file changed, 13 insertions(+), 2 deletions(-)
>
> diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
> index 80ce478c4851..93f81d9cd85f 100644
> --- a/net/ipv6/addrconf.c
> +++ b/net/ipv6/addrconf.c
> @@ -3271,14 +3271,25 @@ static void addrconf_gre_config(struct net_device *dev)
>  static int fixup_permanent_addr(struct inet6_dev *idev,
>                                 struct inet6_ifaddr *ifp)
>  {
> -       if (!ifp->rt) {
> -               struct rt6_info *rt;
> +       /* rt6i_ref == 0 means the host route was removed from the
> +        * FIB, for example, if 'lo' device is taken down. In that
> +        * case regenerate the host route.
> +        */
> +       if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) {
> +               struct rt6_info *rt, *prev;
>
>                 rt = addrconf_dst_alloc(idev, &ifp->addr, false);
>                 if (unlikely(IS_ERR(rt)))
>                         return PTR_ERR(rt);
>
> +               prev = ifp->rt;
> +
> +               /* ifp->rt can be accessed outside of rtnl */
> +               spin_lock(&ifp->lock);
>                 ifp->rt = rt;
> +               spin_unlock(&ifp->lock);
> +
> +               ip6_rt_put(prev);
>         }
>
>         if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) {
> --
> 2.1.4
>

^ permalink raw reply

* Re: [PATCH net-next 0/2] flower: add MPLS matching support
From: Jamal Hadi Salim @ 2017-04-25 12:47 UTC (permalink / raw)
  To: Simon Horman, Jakub Kicinski
  Cc: David Miller, benjamin.lahaise, netdev, bcrl, Jiri Pirko
In-Reply-To: <20170425115459.GA23939@vergenet.net>

On 17-04-25 07:55 AM, Simon Horman wrote:
[..]
>
> I agree something should be done wrt BOS. If the LABEL and TC are to
> be left as-is then I think a similar treatment of BOS - that is masking it
> - makes sense.
>
> I also agree with statements made earlier in the thread that it is unlikely
> that the unused bits of these attributes will be used - as opposed to a
> bitmask of flag values which seems ripe for re-use for future flags.
>

For your use case, I think you are fine if you just do the mask in the
kernel. A mask  to a user value implies "I am ignoring the rest
of these bits - I dont  care if you set them "

> I would like to add to the discussion that I think in future it would
> be good to expand the features provided by this patch to support supplying
> a mask as part of the match - as flower supports for other fields such
> as IP addresses. But I think the current scheme of masking out invalid bits
> should also work in conjunction with user-supplied masks.
>

The challenge we have right now is "users do stoopid or malicious
things". So are you going to accept the wrong bitmap + mask?

cheers,
jamal

^ permalink raw reply

* [PATCH net] sfc: tx ring can only have 2048 entries for all EF10 NICs
From: Bert Kenward @ 2017-04-25 12:44 UTC (permalink / raw)
  To: Dave Miller; +Cc: netdev, linux-net-drivers, Patrick Talbert

Fixes: dd248f1bc65b ("sfc: Add PCI ID for Solarflare 8000 series 10/40G NIC")
Reported-by: Patrick Talbert <ptalbert@redhat.com>
Signed-off-by: Bert Kenward <bkenward@solarflare.com>
---
 drivers/net/ethernet/sfc/efx.h         | 5 ++++-
 drivers/net/ethernet/sfc/workarounds.h | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h
index ee14662415c5..a0c52e328102 100644
--- a/drivers/net/ethernet/sfc/efx.h
+++ b/drivers/net/ethernet/sfc/efx.h
@@ -74,7 +74,10 @@ void efx_schedule_slow_fill(struct efx_rx_queue *rx_queue);
 #define EFX_RXQ_MIN_ENT		128U
 #define EFX_TXQ_MIN_ENT(efx)	(2 * efx_tx_max_skb_descs(efx))
 
-#define EFX_TXQ_MAX_ENT(efx)	(EFX_WORKAROUND_35388(efx) ? \
+/* All EF10 architecture NICs steal one bit of the DMAQ size for various
+ * other purposes when counting TxQ entries, so we halve the queue size.
+ */
+#define EFX_TXQ_MAX_ENT(efx)	(EFX_WORKAROUND_EF10(efx) ? \
 				 EFX_MAX_DMAQ_SIZE / 2 : EFX_MAX_DMAQ_SIZE)
 
 static inline bool efx_rss_enabled(struct efx_nic *efx)
diff --git a/drivers/net/ethernet/sfc/workarounds.h b/drivers/net/ethernet/sfc/workarounds.h
index 103f827a1623..c67fa18b8121 100644
--- a/drivers/net/ethernet/sfc/workarounds.h
+++ b/drivers/net/ethernet/sfc/workarounds.h
@@ -16,6 +16,7 @@
  */
 
 #define EFX_WORKAROUND_SIENA(efx) (efx_nic_rev(efx) == EFX_REV_SIENA_A0)
+#define EFX_WORKAROUND_EF10(efx) (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
 #define EFX_WORKAROUND_10G(efx) 1
 
 /* Bit-bashed I2C reads cause performance drop */
-- 
2.7.4

^ permalink raw reply related

* Re: [RFC 3/4] nfp: make use of extended ack message reporting
From: Jamal Hadi Salim @ 2017-04-25 12:42 UTC (permalink / raw)
  To: Jakub Kicinski, netdev
  Cc: davem, johannes, dsa, daniel, alexei.starovoitov, bblanco,
	john.fastabend, kubakici, oss-drivers
In-Reply-To: <20170425080644.122536-4-jakub.kicinski@netronome.com>

Good stuff. Question below:

On 17-04-25 04:06 AM, Jakub Kicinski wrote:
> Try to carry error messages to the user via the netlink extended

[..]
> +nfp_net_check_config(struct nfp_net *nn, struct nfp_net_dp *dp,
> +		     struct netlink_ext_ack *extack)
>  {
>  	/* XDP-enabled tests */
>  	if (!dp->xdp_prog)
>  		return 0;
>  	if (dp->fl_bufsz > PAGE_SIZE) {
> -		nn_warn(nn, "MTU too large w/ XDP enabled\n");
> +		NL_SET_ERR_MSG(extack, "MTU too large w/ XDP enabled");

So are we going to standardize these strings?
i.e what if some user has written a bash script that depends on this
string and it gets changed later.

cheers,
jamal

^ permalink raw reply

* Re: net/ipv6: slab-out-of-bounds in ip6_tnl_xmit
From: Andrey Konovalov @ 2017-04-25 12:32 UTC (permalink / raw)
  To: Cong Wang
  Cc: David S. Miller, Alexey Kuznetsov, James Morris,
	Hideaki YOSHIFUJI, Patrick McHardy, netdev, LKML, Eric Dumazet,
	Dmitry Vyukov, Kostya Serebryany, syzkaller
In-Reply-To: <CAM_iQpWnPgn0+-y34nGTB0byonLWCdf3_VAKcH8tK5fdzF4CBg@mail.gmail.com>

On Tue, Apr 25, 2017 at 7:04 AM, Cong Wang <xiyou.wangcong@gmail.com> wrote:
> On Mon, Apr 24, 2017 at 9:47 AM, Cong Wang <xiyou.wangcong@gmail.com> wrote:
>>
>> We use ipv4 dst in ip6_tunnel and cast an IPv4 neigh key as an
>> IPv6 address...
>>
>>
>>                 neigh = dst_neigh_lookup(skb_dst(skb),
>>                                          &ipv6_hdr(skb)->daddr);
>>                 if (!neigh)
>>                         goto tx_err_link_failure;
>>
>>                 addr6 = (struct in6_addr *)&neigh->primary_key; // <=== HERE
>>                 addr_type = ipv6_addr_type(addr6);
>>
>>                 if (addr_type == IPV6_ADDR_ANY)
>>                         addr6 = &ipv6_hdr(skb)->daddr;
>>
>>                 memcpy(&fl6->daddr, addr6, sizeof(fl6->daddr));
>>
>> Also the network header of the skb at this point should be still IPv4?
>
> Please try the attached patch.

I don't see these crashes with your patch.

Thanks!

Tested-by: Andrey Konovalov <andreyknvl@google.com>

>
> I am not sure how we could handle 4in6 case better than just relying on
> the config of ip6 tunnel.

^ permalink raw reply

* net: pcnet32: [BUG] IRQ disabled when resizing ring buffer
From: Corentin Labbe @ 2017-04-25 12:25 UTC (permalink / raw)
  To: pcnet32; +Cc: netdev, linux-kernel

Hello

When resizing ring buffer on qemu/pcnet32 with linux-next and CONFIG_DMA_API_DEBUG=y, I got the following trace:
[8862.793779] ------------[ cut here ]------------
[ 8862.793784] WARNING: CPU: 0 PID: 26592 at /linux-next/include/linux/dma-mapping.h:505 pcnet32_set_ringparam+0xd55/0xda0
[ 8862.793785] Modules linked in:
[ 8862.793788] CPU: 0 PID: 26592 Comm: ethtool Tainted: G        W       4.11.0-rc7-next-20170424+ #68
[ 8862.793789] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.10.1-0-g8891697-prebuilt.qemu-project.org 04/01/2014
[ 8862.793790] task: ffff880235390000 task.stack: ffff8801db300000
[ 8862.793792] RIP: 0010:pcnet32_set_ringparam+0xd55/0xda0
[ 8862.793794] RSP: 0018:ffff8801db303be0 EFLAGS: 00010046
[ 8862.793795] RAX: 0000000000000086 RBX: ffff880234d8d000 RCX: 0000000180800052
[ 8862.793797] RDX: 0000000000000004 RSI: ffffea00070b7700 RDI: ffff880236803a40
[ 8862.793798] RBP: ffff8801db303ca0 R08: 0000000000000001 R09: ffff880236a138a0
[ 8862.793799] R10: 00000000bb83e000 R11: ffff8800bb83e000 R12: 0000000000000004
[ 8862.793800] R13: 0000000000000010 R14: ffff8801db303cdc R15: ffffffff81807300
[ 8862.793803] FS:  00007f55edcc6700(0000) GS:ffff88023fc00000(0000) knlGS:0000000000000000
[ 8862.793804] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 8862.793805] CR2: 00007f55ed4eebe0 CR3: 00000001db2e0000 CR4: 00000000000006f0
[ 8862.793808] Call Trace:
[ 8862.793811]  ? get_page_from_freelist+0x263/0x930
[ 8862.793814]  dev_ethtool+0xde3/0x20b0
[ 8862.793816]  ? __alloc_pages_nodemask+0xd9/0xd70
[ 8862.793818]  ? __rtnl_unlock+0x25/0x60
[ 8862.793821]  ? netdev_run_todo+0x5c/0x320
[ 8862.793823]  ? __might_sleep+0x50/0x90
[ 8862.793824]  ? _cond_resched+0x14/0x30
[ 8862.793826]  dev_ioctl+0x18c/0x620
[ 8862.793828]  ? _raw_spin_unlock+0x9/0x20
[ 8862.793830]  sock_do_ioctl+0x4f/0x60
[ 8862.793832]  sock_ioctl+0x247/0x320
[ 8862.793834]  do_vfs_ioctl+0xaa/0x720
[ 8862.793836]  ? __do_page_fault+0x1d1/0x410
[ 8862.793838]  SyS_ioctl+0x47/0x80
[ 8862.793840]  entry_SYSCALL_64_fastpath+0x1a/0xa5
[ 8862.793842] RIP: 0033:0x7f55ed4eebe7
[ 8862.793843] RSP: 002b:00007ffd0b76bef8 EFLAGS: 00000246 ORIG_RAX: 0000000000000010
[ 8862.793845] RAX: ffffffffffffffda RBX: 00007ffd0b76c050 RCX: 00007f55ed4eebe7
[ 8862.793846] RDX: 00007ffd0b76c060 RSI: 0000000000008946 RDI: 0000000000000003
[ 8862.793847] RBP: 00007ffd0b76bf20 R08: 000000000000000a R09: 0000000000000000
[ 8862.793848] R10: 000000000000053e R11: 0000000000000246 R12: 00007ffd0b76c060
[ 8862.793848] R13: 00007ffd0b76c1d0 R14: 0000000000420e6f R15: 0000000000000002
[ 8862.793850] Code: 25 19 e1 6f 00 4d 85 e4 0f 85 34 fd ff ff eb 95 48 c7 c2 20 ea 8a 81 48 c7 c6 32 a6 9c 81 48 89 df e8 80 a7 18 00 e9 33 ff ff ff <0f> ff e9 a2 f7 ff ff 0f ff 66 90 e9    2d f6 ff ff 0f ff 66 0f 1f
[ 8862.793881] ---[ end trace ecb86a2cf3a149f7 ]---

Regards
Corentin Labbe

^ permalink raw reply

* [PATCH 3/3] net: can: usb: gs_usb: Fix buffer on stack
From: Marc Kleine-Budde @ 2017-04-25 12:16 UTC (permalink / raw)
  To: netdev
  Cc: davem, linux-can, kernel, Maksim Salau, linux-stable # = v4 . 8,
	Marc Kleine-Budde
In-Reply-To: <20170425121645.9946-1-mkl@pengutronix.de>

From: Maksim Salau <maksim.salau@gmail.com>

Allocate buffers on HEAP instead of STACK for local structures
that are to be sent using usb_control_msg().

Signed-off-by: Maksim Salau <maksim.salau@gmail.com>
Cc: linux-stable <stable@vger.kernel.org> # >= v4.8
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/gs_usb.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c
index 300349fe8dc0..eecee7f8dfb7 100644
--- a/drivers/net/can/usb/gs_usb.c
+++ b/drivers/net/can/usb/gs_usb.c
@@ -739,13 +739,18 @@ static const struct net_device_ops gs_usb_netdev_ops = {
 static int gs_usb_set_identify(struct net_device *netdev, bool do_identify)
 {
 	struct gs_can *dev = netdev_priv(netdev);
-	struct gs_identify_mode imode;
+	struct gs_identify_mode *imode;
 	int rc;
 
+	imode = kmalloc(sizeof(*imode), GFP_KERNEL);
+
+	if (!imode)
+		return -ENOMEM;
+
 	if (do_identify)
-		imode.mode = GS_CAN_IDENTIFY_ON;
+		imode->mode = GS_CAN_IDENTIFY_ON;
 	else
-		imode.mode = GS_CAN_IDENTIFY_OFF;
+		imode->mode = GS_CAN_IDENTIFY_OFF;
 
 	rc = usb_control_msg(interface_to_usbdev(dev->iface),
 			     usb_sndctrlpipe(interface_to_usbdev(dev->iface),
@@ -755,10 +760,12 @@ static int gs_usb_set_identify(struct net_device *netdev, bool do_identify)
 			     USB_RECIP_INTERFACE,
 			     dev->channel,
 			     0,
-			     &imode,
-			     sizeof(imode),
+			     imode,
+			     sizeof(*imode),
 			     100);
 
+	kfree(imode);
+
 	return (rc > 0) ? 0 : rc;
 }
 
-- 
2.11.0

^ permalink raw reply related

* pull-request: can 2017-04-25
From: Marc Kleine-Budde @ 2017-04-25 12:16 UTC (permalink / raw)
  To: netdev; +Cc: davem, linux-can, kernel

Hello David,

this is a pull request of three patches for net/master.

There are two patches by Stephane Grosjean for that add a new variant to the
PCAN-Chip USB driver. The other patch is by Maksim Salau, which swtiches the
memory for USB transfers from heap to stack.

regrds,
Marc

---

The following changes since commit 38a98bceaf5f786b931d16826fbb46e73280849b:

  Merge branch 'dsa-b53-58xx-fixes' (2017-04-24 18:29:11 -0400)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/mkl/linux-can.git tags/linux-can-fixes-for-4.11-20170425

for you to fetch changes up to b05c73bd1e3ec60357580eb042ee932a5ed754d5:

  net: can: usb: gs_usb: Fix buffer on stack (2017-04-25 14:08:35 +0200)

----------------------------------------------------------------
linux-can-fixes-for-4.11-20170425

----------------------------------------------------------------
Maksim Salau (1):
      net: can: usb: gs_usb: Fix buffer on stack

Stephane Grosjean (2):
      can: usb: Add support of PCAN-Chip USB stamp module
      can: usb: Kconfig: Add PCAN-USB X6 device in help text

 drivers/net/can/usb/Kconfig                  |  2 +
 drivers/net/can/usb/gs_usb.c                 | 17 +++++--
 drivers/net/can/usb/peak_usb/pcan_usb_core.c |  2 +
 drivers/net/can/usb/peak_usb/pcan_usb_core.h |  2 +
 drivers/net/can/usb/peak_usb/pcan_usb_fd.c   | 72 ++++++++++++++++++++++++++++
 5 files changed, 90 insertions(+), 5 deletions(-)

^ permalink raw reply

* [PATCH 2/3] can: usb: Kconfig: Add PCAN-USB X6 device in help text
From: Marc Kleine-Budde @ 2017-04-25 12:16 UTC (permalink / raw)
  To: netdev; +Cc: davem, linux-can, kernel, Stephane Grosjean, Marc Kleine-Budde
In-Reply-To: <20170425121645.9946-1-mkl@pengutronix.de>

From: Stephane Grosjean <s.grosjean@peak-system.com>

This patch adds a text line in the help section of the CAN_PEAK_USB
config item describing the support of the PCAN-USB X6 adapter, which is
already included in the Kernel since 4.9.

Signed-off-by: Stephane Grosjean <s.grosjean@peak-system.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig
index 3f8adc366af4..5f9e0e6301d0 100644
--- a/drivers/net/can/usb/Kconfig
+++ b/drivers/net/can/usb/Kconfig
@@ -73,6 +73,7 @@ config CAN_PEAK_USB
 	  PCAN-USB FD          single CAN-FD channel USB adapter
 	  PCAN-USB Pro FD      dual CAN-FD channels USB adapter
 	  PCAN-Chip USB        CAN-FD to USB stamp module
+	  PCAN-USB X6          6 CAN-FD channels USB adapter
 
 	  (see also http://www.peak-system.com).
 
-- 
2.11.0


^ permalink raw reply related

* [PATCH 1/3] can: usb: Add support of PCAN-Chip USB stamp module
From: Marc Kleine-Budde @ 2017-04-25 12:16 UTC (permalink / raw)
  To: netdev; +Cc: davem, linux-can, kernel, Stephane Grosjean, Marc Kleine-Budde
In-Reply-To: <20170425121645.9946-1-mkl@pengutronix.de>

From: Stephane Grosjean <s.grosjean@peak-system.com>

This patch adds the support of the PCAN-Chip USB, a stamp module for
customer hardware designs, which communicates via USB 2.0 with the
hardware. The integrated CAN controller supports the protocols CAN 2.0 A/B
as well as CAN FD. The physical CAN connection is determined by external
wiring. The Stamp module with its single-sided mounting and plated
half-holes is suitable for automatic assembly.

Note that the chip is equipped with the same logic than the PCAN-USB FD.

Signed-off-by: Stephane Grosjean <s.grosjean@peak-system.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/usb/Kconfig                  |  1 +
 drivers/net/can/usb/peak_usb/pcan_usb_core.c |  2 +
 drivers/net/can/usb/peak_usb/pcan_usb_core.h |  2 +
 drivers/net/can/usb/peak_usb/pcan_usb_fd.c   | 72 ++++++++++++++++++++++++++++
 4 files changed, 77 insertions(+)

diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig
index 8483a40e7e9e..3f8adc366af4 100644
--- a/drivers/net/can/usb/Kconfig
+++ b/drivers/net/can/usb/Kconfig
@@ -72,6 +72,7 @@ config CAN_PEAK_USB
 	  PCAN-USB Pro         dual CAN 2.0b channels USB adapter
 	  PCAN-USB FD          single CAN-FD channel USB adapter
 	  PCAN-USB Pro FD      dual CAN-FD channels USB adapter
+	  PCAN-Chip USB        CAN-FD to USB stamp module
 
 	  (see also http://www.peak-system.com).
 
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
index 0b0302af3bd2..57913dbbae0a 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c
@@ -39,6 +39,7 @@ static struct usb_device_id peak_usb_table[] = {
 	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBPRO_PRODUCT_ID)},
 	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBFD_PRODUCT_ID)},
 	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBPROFD_PRODUCT_ID)},
+	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBCHIP_PRODUCT_ID)},
 	{USB_DEVICE(PCAN_USB_VENDOR_ID, PCAN_USBX6_PRODUCT_ID)},
 	{} /* Terminating entry */
 };
@@ -51,6 +52,7 @@ static const struct peak_usb_adapter *const peak_usb_adapters_list[] = {
 	&pcan_usb_pro,
 	&pcan_usb_fd,
 	&pcan_usb_pro_fd,
+	&pcan_usb_chip,
 	&pcan_usb_x6,
 };
 
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.h b/drivers/net/can/usb/peak_usb/pcan_usb_core.h
index 3cbfb069893d..c01316cac354 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_core.h
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.h
@@ -27,6 +27,7 @@
 #define PCAN_USBPRO_PRODUCT_ID		0x000d
 #define PCAN_USBPROFD_PRODUCT_ID	0x0011
 #define PCAN_USBFD_PRODUCT_ID		0x0012
+#define PCAN_USBCHIP_PRODUCT_ID		0x0013
 #define PCAN_USBX6_PRODUCT_ID		0x0014
 
 #define PCAN_USB_DRIVER_NAME		"peak_usb"
@@ -90,6 +91,7 @@ struct peak_usb_adapter {
 extern const struct peak_usb_adapter pcan_usb;
 extern const struct peak_usb_adapter pcan_usb_pro;
 extern const struct peak_usb_adapter pcan_usb_fd;
+extern const struct peak_usb_adapter pcan_usb_chip;
 extern const struct peak_usb_adapter pcan_usb_pro_fd;
 extern const struct peak_usb_adapter pcan_usb_x6;
 
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
index 304732550f0a..528d3bb4917f 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
@@ -1061,6 +1061,78 @@ const struct peak_usb_adapter pcan_usb_fd = {
 	.do_get_berr_counter = pcan_usb_fd_get_berr_counter,
 };
 
+/* describes the PCAN-CHIP USB */
+static const struct can_bittiming_const pcan_usb_chip_const = {
+	.name = "pcan_chip_usb",
+	.tseg1_min = 1,
+	.tseg1_max = (1 << PUCAN_TSLOW_TSGEG1_BITS),
+	.tseg2_min = 1,
+	.tseg2_max = (1 << PUCAN_TSLOW_TSGEG2_BITS),
+	.sjw_max = (1 << PUCAN_TSLOW_SJW_BITS),
+	.brp_min = 1,
+	.brp_max = (1 << PUCAN_TSLOW_BRP_BITS),
+	.brp_inc = 1,
+};
+
+static const struct can_bittiming_const pcan_usb_chip_data_const = {
+	.name = "pcan_chip_usb",
+	.tseg1_min = 1,
+	.tseg1_max = (1 << PUCAN_TFAST_TSGEG1_BITS),
+	.tseg2_min = 1,
+	.tseg2_max = (1 << PUCAN_TFAST_TSGEG2_BITS),
+	.sjw_max = (1 << PUCAN_TFAST_SJW_BITS),
+	.brp_min = 1,
+	.brp_max = (1 << PUCAN_TFAST_BRP_BITS),
+	.brp_inc = 1,
+};
+
+const struct peak_usb_adapter pcan_usb_chip = {
+	.name = "PCAN-Chip USB",
+	.device_id = PCAN_USBCHIP_PRODUCT_ID,
+	.ctrl_count = PCAN_USBFD_CHANNEL_COUNT,
+	.ctrlmode_supported = CAN_CTRLMODE_FD |
+		CAN_CTRLMODE_3_SAMPLES | CAN_CTRLMODE_LISTENONLY,
+	.clock = {
+		.freq = PCAN_UFD_CRYSTAL_HZ,
+	},
+	.bittiming_const = &pcan_usb_chip_const,
+	.data_bittiming_const = &pcan_usb_chip_data_const,
+
+	/* size of device private data */
+	.sizeof_dev_private = sizeof(struct pcan_usb_fd_device),
+
+	/* timestamps usage */
+	.ts_used_bits = 32,
+	.ts_period = 1000000, /* calibration period in ts. */
+	.us_per_ts_scale = 1, /* us = (ts * scale) >> shift */
+	.us_per_ts_shift = 0,
+
+	/* give here messages in/out endpoints */
+	.ep_msg_in = PCAN_USBPRO_EP_MSGIN,
+	.ep_msg_out = {PCAN_USBPRO_EP_MSGOUT_0},
+
+	/* size of rx/tx usb buffers */
+	.rx_buffer_size = PCAN_UFD_RX_BUFFER_SIZE,
+	.tx_buffer_size = PCAN_UFD_TX_BUFFER_SIZE,
+
+	/* device callbacks */
+	.intf_probe = pcan_usb_pro_probe,	/* same as PCAN-USB Pro */
+	.dev_init = pcan_usb_fd_init,
+
+	.dev_exit = pcan_usb_fd_exit,
+	.dev_free = pcan_usb_fd_free,
+	.dev_set_bus = pcan_usb_fd_set_bus,
+	.dev_set_bittiming = pcan_usb_fd_set_bittiming_slow,
+	.dev_set_data_bittiming = pcan_usb_fd_set_bittiming_fast,
+	.dev_decode_buf = pcan_usb_fd_decode_buf,
+	.dev_start = pcan_usb_fd_start,
+	.dev_stop = pcan_usb_fd_stop,
+	.dev_restart_async = pcan_usb_fd_restart_async,
+	.dev_encode_msg = pcan_usb_fd_encode_msg,
+
+	.do_get_berr_counter = pcan_usb_fd_get_berr_counter,
+};
+
 /* describes the PCAN-USB Pro FD adapter */
 static const struct can_bittiming_const pcan_usb_pro_fd_const = {
 	.name = "pcan_usb_pro_fd",
-- 
2.11.0


^ permalink raw reply related

* Re: [PATCH] stmmac: Add support for SIMATIC IOT2000 platform
From: Jan Kiszka @ 2017-04-25 12:15 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: Giuseppe Cavallaro, Alexandre Torgue, netdev,
	Linux Kernel Mailing List, Sascha Weisenberger
In-Reply-To: <CAHp75Vd=TkJXnDYqctKCzRGvMX_Zt0i5eH8GcBz44e-T93aJ0A@mail.gmail.com>

On 2017-04-25 13:42, Andy Shevchenko wrote:
> On Tue, Apr 25, 2017 at 1:09 PM, Jan Kiszka <jan.kiszka@siemens.com> wrote:
>> On 2017-04-25 12:07, Jan Kiszka wrote:
>>> On 2017-04-25 11:46, Andy Shevchenko wrote:
>>>> On Tue, Apr 25, 2017 at 12:00 PM, Jan Kiszka <jan.kiszka@siemens.com> wrote:
>>>>> On 2017-04-25 09:30, Andy Shevchenko wrote:
>>>>>> On Tue, Apr 25, 2017 at 8:44 AM, Jan Kiszka <jan.kiszka@siemens.com> wrote:
>>>>>>> On 2017-04-24 23:27, Andy Shevchenko wrote:
>>>>>>>> On Mon, Apr 24, 2017 at 10:27 PM, Jan Kiszka <jan.kiszka@siemens.com> wrote:
> 
>>>>        {
>>>>                .name = "SIMATIC IOT2000",
>>>>                .func = 6,
>>>>                .phy_addr = 1,
>>>>        },
>>>>        {
>>>>                .name = "SIMATIC IOT2000",
>>>>                .func = 7,
>>>>                .phy_addr = 1,
>>>>        },
>>>>
>>>> That's all what you need.
>>>
>>> Nope. Again: the asset tag is the way to tell both apart AND to ensure
>>> that we do not match on future devices.
> 
>> To be more verbose: your version (which is our old one) would even
>> enable the second, not connected port on the IOT2020. Incorrectly.
> 
> So, name has 2000 for 2020 device? It's clear bug in DMI table you have. :-(
> 
> What else do you have in DMI which can be used to distinguish those devices?

Andy, there are devices out there in the field, if we as engineers like
it or not, that are all called "IOT2000" although they are sightly
different inside. This patch accounts for that. And it does that even
without adding "platform_data" hacks like in other patches of mine. ;)

Jan

-- 
Siemens AG, Corporate Technology, CT RDA ITP SES-DE
Corporate Competence Center Embedded Linux

^ permalink raw reply

* Re: [PATCH net-next v8 2/3] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
From: Jiri Pirko @ 2017-04-25 12:13 UTC (permalink / raw)
  To: Jamal Hadi Salim; +Cc: davem, xiyou.wangcong, eric.dumazet, netdev
In-Reply-To: <1493121247-11863-3-git-send-email-jhs@emojatatu.com>

Tue, Apr 25, 2017 at 01:54:06PM CEST, jhs@mojatatu.com wrote:
>From: Jamal Hadi Salim <jhs@mojatatu.com>
>
>When you dump hundreds of thousands of actions, getting only 32 per
>dump batch even when the socket buffer and memory allocations allow
>is inefficient.
>
>With this change, the user will get as many as possibly fitting
>within the given constraints available to the kernel.
>
>The top level action TLV space is extended. An attribute
>TCA_ROOT_FLAGS is used to carry flags; flag TCA_FLAG_LARGE_DUMP_ON
>is set by the user indicating the user is capable of processing
>these large dumps. Older user space which doesnt set this flag
>doesnt get the large (than 32) batches.
>The kernel uses the TCA_ROOT_COUNT attribute to tell the user how many
>actions are put in a single batch. As such user space app knows how long
>to iterate (independent of the type of action being dumped)
>instead of hardcoded maximum of 32.
>
>Some results dumping 1.5M actions, first unpatched tc which the
>kernel doesnt help:
>
>prompt$ time -p tc actions ls action gact | grep index | wc -l
>1500000
>real 1388.43
>user 2.07
>sys 1386.79
>
>Now lets see a patched tc which sets the correct flags when requesting
>a dump:
>
>prompt$ time -p updatedtc actions ls action gact | grep index | wc -l
>1500000
>real 178.13
>user 2.02
>sys 176.96
>
>That is about 8x performance improvement for tc which sets its
>receive buffer to about 32K.
>
>Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
>---
> include/uapi/linux/rtnetlink.h | 22 ++++++++++++++--
> net/sched/act_api.c            | 59 +++++++++++++++++++++++++++++++++++-------
> 2 files changed, 69 insertions(+), 12 deletions(-)
>
>diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
>index cce0613..5875467 100644
>--- a/include/uapi/linux/rtnetlink.h
>+++ b/include/uapi/linux/rtnetlink.h
>@@ -674,10 +674,28 @@ struct tcamsg {
> 	unsigned char	tca__pad1;
> 	unsigned short	tca__pad2;
> };
>+
>+enum {
>+	TCA_ROOT_UNSPEC,
>+	TCA_ROOT_TAB,
>+#define TCA_ACT_TAB TCA_ROOT_TAB
>+	TCA_ROOT_FLAGS,
>+	TCA_ROOT_COUNT,
>+	__TCA_ROOT_MAX,
>+#define	TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
>+};
>+
> #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
> #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
>-#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
>-#define TCAA_MAX 1
>+/* tcamsg flags stored in attribute TCA_ROOT_FLAGS
>+ *
>+ * TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
>+ * actions in a dump. All dump responses will contain the number of actions
>+ * being dumped stored in for user app's consumption in TCA_ROOT_COUNT
>+ *
>+ */
>+#define TCA_FLAG_LARGE_DUMP_ON		(1 << 0)

BIT (I think I mentioned this before)

>+#define VALID_TCA_FLAGS TCA_FLAG_LARGE_DUMP_ON

Again, namespace please. "TCA_ROOT_FLAGS_VALID"
Why is this UAPI?


> 
> /* New extended info filters for IFLA_EXT_MASK */
> #define RTEXT_FILTER_VF		(1 << 0)
>diff --git a/net/sched/act_api.c b/net/sched/act_api.c
>index 9ce22b7..2756213 100644
>--- a/net/sched/act_api.c
>+++ b/net/sched/act_api.c
>@@ -83,6 +83,7 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
> 			   struct netlink_callback *cb)
> {
> 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
>+	u32 act_flags = cb->args[2];
> 	struct nlattr *nest;
> 
> 	spin_lock_bh(&hinfo->lock);
>@@ -111,14 +112,18 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
> 			}
> 			nla_nest_end(skb, nest);
> 			n_i++;
>-			if (n_i >= TCA_ACT_MAX_PRIO)
>+			if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) &&
>+			    n_i >= TCA_ACT_MAX_PRIO)
> 				goto done;
> 		}
> 	}
> done:
> 	spin_unlock_bh(&hinfo->lock);
>-	if (n_i)
>+	if (n_i) {
> 		cb->args[0] += n_i;
>+		if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
>+			cb->args[1] = n_i;
>+	}
> 	return n_i;
> 
> nla_put_failure:
>@@ -993,11 +998,15 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
> 	return tcf_add_notify(net, n, &actions, portid);
> }
> 
>+static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
>+	[TCA_ROOT_FLAGS]      = { .type = NLA_U32 },
>+};
>+
> static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
> 			 struct netlink_ext_ack *extack)
> {
> 	struct net *net = sock_net(skb->sk);
>-	struct nlattr *tca[TCAA_MAX + 1];
>+	struct nlattr *tca[TCA_ROOT_MAX + 1];
> 	u32 portid = skb ? NETLINK_CB(skb).portid : 0;
> 	int ret = 0, ovr = 0;
> 
>@@ -1005,7 +1014,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
> 	    !netlink_capable(skb, CAP_NET_ADMIN))
> 		return -EPERM;
> 
>-	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCAA_MAX, NULL,
>+	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL,
> 			  extack);
> 	if (ret < 0)
> 		return ret;
>@@ -1046,16 +1055,12 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
> 	return ret;
> }
> 
>-static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
>+static struct nlattr *find_dump_kind(struct nlattr **nla)
> {
> 	struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
> 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
>-	struct nlattr *nla[TCAA_MAX + 1];
> 	struct nlattr *kind;
> 
>-	if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX,
>-			NULL, NULL) < 0)
>-		return NULL;
> 	tb1 = nla[TCA_ACT_TAB];
> 	if (tb1 == NULL)
> 		return NULL;
>@@ -1073,6 +1078,16 @@ static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
> 	return kind;
> }
> 
>+static inline bool tca_flags_valid(u32 act_flags)
>+{
>+	u32 invalid_flags_mask  = ~VALID_TCA_FLAGS;
>+
>+	if (act_flags & invalid_flags_mask)
>+		return false;

I don't see how this resolves anything. VALID_TCA_FLAGS is set in stone
not going to change anytime in future, right? Then the TCA_ROOT_FLAGS
attr will always contain only one flag, right?
Then, I don't see why do we need this dance... u8 flag attr resolves
this in elegant way. I know I sound like a broken record. This is the
last time I'm commenting on this. I give up.


>+
>+	return true;
>+}
>+
> static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
> {
> 	struct net *net = sock_net(skb->sk);
>@@ -1082,8 +1097,18 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
> 	struct tc_action_ops *a_o;
> 	int ret = 0;
> 	struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
>-	struct nlattr *kind = find_dump_kind(cb->nlh);
>+	struct nlattr *count_attr = NULL;
>+	struct nlattr *tb[TCA_ROOT_MAX + 1];
>+	struct nlattr *kind = NULL;
>+	u32 act_flags = 0;
>+	u32 act_count = 0;
>+
>+	ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX,
>+			  tcaa_policy, NULL);
>+	if (ret < 0)
>+		return ret;
> 
>+	kind = find_dump_kind(tb);
> 	if (kind == NULL) {
> 		pr_info("tc_dump_action: action bad kind\n");
> 		return 0;
>@@ -1093,14 +1118,25 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
> 	if (a_o == NULL)
> 		return 0;
> 
>+	if (tb[TCA_ROOT_FLAGS])
>+		act_flags = nla_get_u32(tb[TCA_ROOT_FLAGS]);
>+
>+	if (act_flags && !tca_flags_valid(act_flags))
>+		return -EINVAL;
>+
> 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
> 			cb->nlh->nlmsg_type, sizeof(*t), 0);
> 	if (!nlh)
> 		goto out_module_put;
>+
>+	cb->args[2] = act_flags;
> 	t = nlmsg_data(nlh);
> 	t->tca_family = AF_UNSPEC;
> 	t->tca__pad1 = 0;
> 	t->tca__pad2 = 0;
>+	count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32));
>+	if (!count_attr)
>+		goto out_module_put;
> 
> 	nest = nla_nest_start(skb, TCA_ACT_TAB);
> 	if (nest == NULL)
>@@ -1113,6 +1149,9 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
> 	if (ret > 0) {
> 		nla_nest_end(skb, nest);
> 		ret = skb->len;
>+		act_count = cb->args[1];
>+		memcpy(nla_data(count_attr), &act_count, sizeof(u32));
>+		cb->args[1] = 0;
> 	} else
> 		nlmsg_trim(skb, b);
> 
>-- 
>1.9.1
>

^ permalink raw reply

* [PATCH net] net: batman-adv: Fix possible memleaks when fail to register_netdevice
From: gfree.wind @ 2017-04-25 12:03 UTC (permalink / raw)
  To: mareklindner, sw, a, davem, b.a.t.m.a.n, netdev; +Cc: Gao Feng

From: Gao Feng <fgao@ikuai8.com>

Because the func batadv_softif_init_late allocate some resources and
it would be invoked in register_netdevice. So we need to invoke the
func batadv_softif_free instead of free_netdev to cleanup when fail
to register_netdevice.

Signed-off-by: Gao Feng <fgao@ikuai8.com>
---
 net/batman-adv/soft-interface.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index d042c99..90bf990 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -1011,7 +1011,7 @@ struct net_device *batadv_softif_create(struct net *net, const char *name)
 	if (ret < 0) {
 		pr_err("Unable to register the batman interface '%s': %i\n",
 		       name, ret);
-		free_netdev(soft_iface);
+		batadv_softif_free(soft_iface);
 		return NULL;
 	}
 
-- 
1.9.1

^ permalink raw reply related

* [PATCH net] driver/net: Fix possible memleaks when fail to register_netdevice
From: gfree.wind @ 2017-04-25 12:01 UTC (permalink / raw)
  To: jiri, davem, kuznet, jmorris, yoshfuji, kaber, steffen.klassert,
	herbert, netdev
  Cc: Gao Feng

From: Gao Feng <fgao@ikuai8.com>

These drivers allocate kinds of resources in init routine, and free
some resources in the destructor of net_device. It may cause memleak
when some errors happen after register_netdevice invokes the init
callback. Because only the uninit callback is invoked in the error
handler of register_netdevice, but the destructor not. As a result,
some resources are lost forever.

Now invokes the destructor instead of free_netdev somewhere, and free
the left resources in the newlink func when fail to register_netdevice.

Signed-off-by: Gao Feng <fgao@ikuai8.com>
---
 drivers/net/dummy.c      |  2 +-
 drivers/net/ifb.c        |  2 +-
 drivers/net/loopback.c   |  2 +-
 drivers/net/team/team.c  | 11 ++++++++++-
 drivers/net/veth.c       |  4 ++--
 net/8021q/vlan_netlink.c |  6 +++++-
 net/ipv4/ip_tunnel.c     |  9 ++++++++-
 net/ipv6/ip6_gre.c       |  6 +++++-
 net/ipv6/ip6_tunnel.c    | 12 ++++++++++--
 net/ipv6/ip6_vti.c       |  7 ++++++-
 net/ipv6/sit.c           |  5 ++++-
 11 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index 2c80611..55b8a50 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -383,7 +383,7 @@ static int __init dummy_init_one(void)
 	return 0;
 
 err:
-	free_netdev(dev_dummy);
+	dummy_free_netdev(dev_dummy);
 	return err;
 }
 
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 312fce7..a298371 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -318,7 +318,7 @@ static int __init ifb_init_one(int index)
 	return 0;
 
 err:
-	free_netdev(dev_ifb);
+	ifb_dev_free(dev_ifb);
 	return err;
 }
 
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index b23b719..c4e1d4c 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -208,7 +208,7 @@ static __net_init int loopback_net_init(struct net *net)
 
 
 out_free_netdev:
-	free_netdev(dev);
+	loopback_dev_free(dev);
 out:
 	if (net_eq(net, &init_net))
 		panic("loopback: Failed to register netdevice: %d\n", err);
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index f8c81f1..0bc80fb 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -2109,10 +2109,19 @@ static void team_setup(struct net_device *dev)
 static int team_newlink(struct net *src_net, struct net_device *dev,
 			struct nlattr *tb[], struct nlattr *data[])
 {
+	int ret;
+
 	if (tb[IFLA_ADDRESS] == NULL)
 		eth_hw_addr_random(dev);
 
-	return register_netdevice(dev);
+	ret = register_netdevice(dev);
+	if (ret) {
+		struct team *team = netdev_priv(dev);
+
+		free_percpu(team->pcpu_stats);
+	}
+
+	return ret;
 }
 
 static int team_validate(struct nlattr *tb[], struct nlattr *data[])
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 8c39d6d..f60f5ee 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -457,13 +457,13 @@ static int veth_newlink(struct net *src_net, struct net_device *dev,
 	return 0;
 
 err_register_dev:
-	/* nothing to do */
+	free_percpu(dev->vstats);
 err_configure_peer:
 	unregister_netdevice(peer);
 	return err;
 
 err_register_peer:
-	free_netdev(peer);
+	veth_dev_free(peer);
 	return err;
 }
 
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 1270207..a15826a 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -156,7 +156,11 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev,
 	if (err < 0)
 		return err;
 
-	return register_vlan_dev(dev);
+	err = register_vlan_dev(dev);
+	if (err)
+		free_percpu(vlan->vlan_pcpu_stats);
+
+	return err;
 }
 
 static inline size_t vlan_qos_map_size(unsigned int n)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 823abae..4acb296 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -63,6 +63,8 @@
 #include <net/ip6_route.h>
 #endif
 
+static void ip_tunnel_dev_free(struct net_device *dev);
+
 static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
 {
 	return hash_32((__force u32)key ^ (__force u32)remote,
@@ -285,7 +287,7 @@ static struct net_device *__ip_tunnel_create(struct net *net,
 	return dev;
 
 failed_free:
-	free_netdev(dev);
+	ip_tunnel_dev_free(dev);
 failed:
 	return ERR_PTR(err);
 }
@@ -1099,7 +1101,12 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
 		dev->mtu = mtu;
 
 	ip_tunnel_add(itn, nt);
+
+	return 0;
 out:
+	gro_cells_destroy(&nt->gro_cells);
+	dst_cache_destroy(&nt->dst_cache);
+	free_percpu(dev->tstats);
 	return err;
 }
 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 6fcb7cb..d409ad1 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -77,6 +77,7 @@ struct ip6gre_net {
 static void ip6gre_tunnel_setup(struct net_device *dev);
 static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t);
 static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu);
+static void ip6gre_dev_free(struct net_device *dev);
 
 /* Tunnel hash table */
 
@@ -351,7 +352,7 @@ static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net,
 	return nt;
 
 failed_free:
-	free_netdev(dev);
+	ip6gre_dev_free(dev);
 	return NULL;
 }
 
@@ -1388,7 +1389,10 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev,
 	dev_hold(dev);
 	ip6gre_tunnel_link(ign, nt);
 
+	return 0;
 out:
+	dst_cache_destroy(&nt->dst_cache);
+	free_percpu(dev->tstats);
 	return err;
 }
 
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 75fac93..95f512c 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1960,11 +1960,12 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
 	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
 	struct ip6_tnl *nt, *t;
 	struct ip_tunnel_encap ipencap;
+	int err;
 
 	nt = netdev_priv(dev);
 
 	if (ip6_tnl_netlink_encap_parms(data, &ipencap)) {
-		int err = ip6_tnl_encap_setup(nt, &ipencap);
+		err = ip6_tnl_encap_setup(nt, &ipencap);
 
 		if (err < 0)
 			return err;
@@ -1981,7 +1982,14 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev,
 			return -EEXIST;
 	}
 
-	return ip6_tnl_create2(dev);
+	err = ip6_tnl_create2(dev);
+	if (err) {
+		gro_cells_destroy(&t->gro_cells);
+		dst_cache_destroy(&t->dst_cache);
+		free_percpu(dev->tstats);
+	}
+
+	return err;
 }
 
 static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 3d8a3b6..b201eef 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -940,6 +940,7 @@ static int vti6_newlink(struct net *src_net, struct net_device *dev,
 {
 	struct net *net = dev_net(dev);
 	struct ip6_tnl *nt;
+	int ret;
 
 	nt = netdev_priv(dev);
 	vti6_netlink_parms(data, &nt->parms);
@@ -949,7 +950,11 @@ static int vti6_newlink(struct net *src_net, struct net_device *dev,
 	if (vti6_locate(net, &nt->parms, 0))
 		return -EEXIST;
 
-	return vti6_tnl_create2(dev);
+	ret = vti6_tnl_create2(dev);
+	if (ret)
+		free_percpu(dev->tstats);
+
+	return ret;
 }
 
 static void vti6_dellink(struct net_device *dev, struct list_head *head)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 99853c6..f45dc4a 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1555,8 +1555,11 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev,
 		return -EEXIST;
 
 	err = ipip6_tunnel_create(dev);
-	if (err < 0)
+	if (err < 0) {
+		dst_cache_destroy(&nt->dst_cache);
+		free_percpu(dev->tstats);
 		return err;
+	}
 
 #ifdef CONFIG_IPV6_SIT_6RD
 	if (ipip6_netlink_6rd_parms(data, &ip6rd))
-- 
1.9.1

^ permalink raw reply related

* Re: [PATCH net-next 0/2] flower: add MPLS matching support
From: Simon Horman @ 2017-04-25 11:55 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Jamal Hadi Salim, David Miller, benjamin.lahaise, netdev, bcrl,
	Jiri Pirko
In-Reply-To: <20170424190743.4ad24ad1@cakuba.netronome.com>

On Mon, Apr 24, 2017 at 07:07:43PM -0700, Jakub Kicinski wrote:
> On Mon, 24 Apr 2017 22:06:08 -0400, Jamal Hadi Salim wrote:
> > On 17-04-24 10:00 PM, Jamal Hadi Salim wrote:
> > > On 17-04-24 09:48 PM, Jamal Hadi Salim wrote:  
> > 
> > >
> > > Hrm. maybe I am wrong.
> > > Lets say user sets all of the 8 bits in BOS,
> > > what does setting
> > > key_val->mpls_bos = nla_get_u8 do?
> > >
> > > Same with the 20 bits for the label in the u32
> > > or 3 bit bits in the u8 tc.  
> > 
> > The label and tc are masked - maybe just the BOS
> > needs something similar?
> 
> Indeed, good catch!

I agree something should be done wrt BOS. If the LABEL and TC are to
be left as-is then I think a similar treatment of BOS - that is masking it
- makes sense.

I also agree with statements made earlier in the thread that it is unlikely
that the unused bits of these attributes will be used - as opposed to a
bitmask of flag values which seems ripe for re-use for future flags.

I would like to add to the discussion that I think in future it would
be good to expand the features provided by this patch to support supplying
a mask as part of the match - as flower supports for other fields such
as IP addresses. But I think the current scheme of masking out invalid bits
should also work in conjunction with user-supplied masks.

^ permalink raw reply

* [PATCH net-next v8 3/3] net sched actions: add time filter for action dumping
From: Jamal Hadi Salim @ 2017-04-25 11:54 UTC (permalink / raw)
  To: davem; +Cc: jiri, xiyou.wangcong, eric.dumazet, netdev, Jamal Hadi Salim
In-Reply-To: <1493121247-11863-1-git-send-email-jhs@emojatatu.com>

From: Jamal Hadi Salim <jhs@mojatatu.com>

This adds support for filtering based on time since last used.
When we are dumping a large number of actions it is useful to
have the option of filtering based on when the action was last
used to reduce the amount of data crossing to user space.

With this patch the user space app sets the TCA_ROOT_TIME_DELTA
attribute with the value in milliseconds with "time of interest
since now".  The kernel converts this to jiffies and does the
filtering comparison matching entries that have seen activity
since then and returns them to user space.
Old kernels and old tc continue to work in legacy mode since
they dont specify this attribute.

Some example (we have 400 actions bound to 400 filters); at
installation time using  hacked tc which sets the time of
interest to 120 seconds:
prompt$ hackedtc actions ls action gact | grep index | wc -l
400

go get some coffee and  wait for > 120 seconds and try again:

prompt$ hackedtc actions ls action gact | grep index | wc -l
0

Lets see a filter bound to one of these actions:
..
filter pref 10 u32
filter pref 10 u32 fh 800: ht divisor 1
filter pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:10  (rule hit 2 success 1)
  match 7f000002/ffffffff at 12 (success 1 )
    action order 1: gact action pass
     random type none pass val 0
     index 23 ref 2 bind 1 installed 1145 sec used 802 sec
    Action statistics:
    Sent 84 bytes 1 pkt (dropped 0, overlimits 0 requeues 0)
    backlog 0b 0p requeues 0
....

that coffee took long, no? It was good.

Now lets ping -c 1 127.0.0.2, then run the actions again:

prompt$ hackedtc actions ls action gact | grep index | wc -l
1

More details please:

prompt$ hackedtc -s actions ls action gact

    action order 0: gact action pass
     random type none pass val 0
     index 23 ref 2 bind 1 installed 1270 sec used 30 sec
    Action statistics:
    Sent 168 bytes 2 pkt (dropped 0, overlimits 0 requeues 0)
    backlog 0b 0p requeues 0

And the filter?

filter pref 10 u32
filter pref 10 u32 fh 800: ht divisor 1
filter pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:10  (rule hit 4 success 2)
  match 7f000002/ffffffff at 12 (success 2 )
    action order 1: gact action pass
     random type none pass val 0
     index 23 ref 2 bind 1 installed 1324 sec used 84 sec
    Action statistics:
    Sent 168 bytes 2 pkt (dropped 0, overlimits 0 requeues 0)
    backlog 0b 0p requeues 0

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
 include/uapi/linux/rtnetlink.h |  1 +
 net/sched/act_api.c            | 22 ++++++++++++++++++++--
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 5875467..39c7499 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -681,6 +681,7 @@ enum {
 #define TCA_ACT_TAB TCA_ROOT_TAB
 	TCA_ROOT_FLAGS,
 	TCA_ROOT_COUNT,
+	TCA_ROOT_TIME_DELTA, /* in msecs */
 	__TCA_ROOT_MAX,
 #define	TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
 };
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 2756213..c0aee2c 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -84,6 +84,7 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
 {
 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
 	u32 act_flags = cb->args[2];
+	unsigned long jiffy_since = cb->args[3];
 	struct nlattr *nest;
 
 	spin_lock_bh(&hinfo->lock);
@@ -101,6 +102,11 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
 			if (index < s_i)
 				continue;
 
+			if (jiffy_since &&
+			    time_after(jiffy_since,
+				       (unsigned long)p->tcfa_tm.lastuse))
+				continue;
+
 			nest = nla_nest_start(skb, n_i);
 			if (nest == NULL)
 				goto nla_put_failure;
@@ -118,9 +124,11 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
 		}
 	}
 done:
+	if (index > 0)
+		cb->args[0] = index + 1;
+
 	spin_unlock_bh(&hinfo->lock);
 	if (n_i) {
-		cb->args[0] += n_i;
 		if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
 			cb->args[1] = n_i;
 	}
@@ -999,7 +1007,8 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
 }
 
 static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
-	[TCA_ROOT_FLAGS]      = { .type = NLA_U32 },
+	[TCA_ROOT_FLAGS]           = { .type = NLA_U32 },
+	[TCA_ROOT_TIME_DELTA]      = { .type = NLA_U32 },
 };
 
 static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1099,7 +1108,9 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
 	struct nlattr *count_attr = NULL;
 	struct nlattr *tb[TCA_ROOT_MAX + 1];
+	unsigned long jiffy_since = 0;
 	struct nlattr *kind = NULL;
+	u32 msecs_since = 0;
 	u32 act_flags = 0;
 	u32 act_count = 0;
 
@@ -1124,12 +1135,19 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (act_flags && !tca_flags_valid(act_flags))
 		return -EINVAL;
 
+	if (tb[TCA_ROOT_TIME_DELTA])
+		msecs_since = nla_get_u32(tb[TCA_ROOT_TIME_DELTA]);
+
 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			cb->nlh->nlmsg_type, sizeof(*t), 0);
 	if (!nlh)
 		goto out_module_put;
 
+	if (msecs_since)
+		jiffy_since = jiffies - msecs_to_jiffies(msecs_since);
+
 	cb->args[2] = act_flags;
+	cb->args[3] = jiffy_since;
 	t = nlmsg_data(nlh);
 	t->tca_family = AF_UNSPEC;
 	t->tca__pad1 = 0;
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next v8 2/3] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
From: Jamal Hadi Salim @ 2017-04-25 11:54 UTC (permalink / raw)
  To: davem; +Cc: jiri, xiyou.wangcong, eric.dumazet, netdev, Jamal Hadi Salim
In-Reply-To: <1493121247-11863-1-git-send-email-jhs@emojatatu.com>

From: Jamal Hadi Salim <jhs@mojatatu.com>

When you dump hundreds of thousands of actions, getting only 32 per
dump batch even when the socket buffer and memory allocations allow
is inefficient.

With this change, the user will get as many as possibly fitting
within the given constraints available to the kernel.

The top level action TLV space is extended. An attribute
TCA_ROOT_FLAGS is used to carry flags; flag TCA_FLAG_LARGE_DUMP_ON
is set by the user indicating the user is capable of processing
these large dumps. Older user space which doesnt set this flag
doesnt get the large (than 32) batches.
The kernel uses the TCA_ROOT_COUNT attribute to tell the user how many
actions are put in a single batch. As such user space app knows how long
to iterate (independent of the type of action being dumped)
instead of hardcoded maximum of 32.

Some results dumping 1.5M actions, first unpatched tc which the
kernel doesnt help:

prompt$ time -p tc actions ls action gact | grep index | wc -l
1500000
real 1388.43
user 2.07
sys 1386.79

Now lets see a patched tc which sets the correct flags when requesting
a dump:

prompt$ time -p updatedtc actions ls action gact | grep index | wc -l
1500000
real 178.13
user 2.02
sys 176.96

That is about 8x performance improvement for tc which sets its
receive buffer to about 32K.

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
 include/uapi/linux/rtnetlink.h | 22 ++++++++++++++--
 net/sched/act_api.c            | 59 +++++++++++++++++++++++++++++++++++-------
 2 files changed, 69 insertions(+), 12 deletions(-)

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index cce0613..5875467 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -674,10 +674,28 @@ struct tcamsg {
 	unsigned char	tca__pad1;
 	unsigned short	tca__pad2;
 };
+
+enum {
+	TCA_ROOT_UNSPEC,
+	TCA_ROOT_TAB,
+#define TCA_ACT_TAB TCA_ROOT_TAB
+	TCA_ROOT_FLAGS,
+	TCA_ROOT_COUNT,
+	__TCA_ROOT_MAX,
+#define	TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
+};
+
 #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
 #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
-#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
-#define TCAA_MAX 1
+/* tcamsg flags stored in attribute TCA_ROOT_FLAGS
+ *
+ * TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
+ * actions in a dump. All dump responses will contain the number of actions
+ * being dumped stored in for user app's consumption in TCA_ROOT_COUNT
+ *
+ */
+#define TCA_FLAG_LARGE_DUMP_ON		(1 << 0)
+#define VALID_TCA_FLAGS TCA_FLAG_LARGE_DUMP_ON
 
 /* New extended info filters for IFLA_EXT_MASK */
 #define RTEXT_FILTER_VF		(1 << 0)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 9ce22b7..2756213 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -83,6 +83,7 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
 			   struct netlink_callback *cb)
 {
 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
+	u32 act_flags = cb->args[2];
 	struct nlattr *nest;
 
 	spin_lock_bh(&hinfo->lock);
@@ -111,14 +112,18 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
 			}
 			nla_nest_end(skb, nest);
 			n_i++;
-			if (n_i >= TCA_ACT_MAX_PRIO)
+			if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) &&
+			    n_i >= TCA_ACT_MAX_PRIO)
 				goto done;
 		}
 	}
 done:
 	spin_unlock_bh(&hinfo->lock);
-	if (n_i)
+	if (n_i) {
 		cb->args[0] += n_i;
+		if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
+			cb->args[1] = n_i;
+	}
 	return n_i;
 
 nla_put_failure:
@@ -993,11 +998,15 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
 	return tcf_add_notify(net, n, &actions, portid);
 }
 
+static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
+	[TCA_ROOT_FLAGS]      = { .type = NLA_U32 },
+};
+
 static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 			 struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(skb->sk);
-	struct nlattr *tca[TCAA_MAX + 1];
+	struct nlattr *tca[TCA_ROOT_MAX + 1];
 	u32 portid = skb ? NETLINK_CB(skb).portid : 0;
 	int ret = 0, ovr = 0;
 
@@ -1005,7 +1014,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 	    !netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
-	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCAA_MAX, NULL,
+	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL,
 			  extack);
 	if (ret < 0)
 		return ret;
@@ -1046,16 +1055,12 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 	return ret;
 }
 
-static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
+static struct nlattr *find_dump_kind(struct nlattr **nla)
 {
 	struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
-	struct nlattr *nla[TCAA_MAX + 1];
 	struct nlattr *kind;
 
-	if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX,
-			NULL, NULL) < 0)
-		return NULL;
 	tb1 = nla[TCA_ACT_TAB];
 	if (tb1 == NULL)
 		return NULL;
@@ -1073,6 +1078,16 @@ static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
 	return kind;
 }
 
+static inline bool tca_flags_valid(u32 act_flags)
+{
+	u32 invalid_flags_mask  = ~VALID_TCA_FLAGS;
+
+	if (act_flags & invalid_flags_mask)
+		return false;
+
+	return true;
+}
+
 static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
@@ -1082,8 +1097,18 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	struct tc_action_ops *a_o;
 	int ret = 0;
 	struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
-	struct nlattr *kind = find_dump_kind(cb->nlh);
+	struct nlattr *count_attr = NULL;
+	struct nlattr *tb[TCA_ROOT_MAX + 1];
+	struct nlattr *kind = NULL;
+	u32 act_flags = 0;
+	u32 act_count = 0;
+
+	ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX,
+			  tcaa_policy, NULL);
+	if (ret < 0)
+		return ret;
 
+	kind = find_dump_kind(tb);
 	if (kind == NULL) {
 		pr_info("tc_dump_action: action bad kind\n");
 		return 0;
@@ -1093,14 +1118,25 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (a_o == NULL)
 		return 0;
 
+	if (tb[TCA_ROOT_FLAGS])
+		act_flags = nla_get_u32(tb[TCA_ROOT_FLAGS]);
+
+	if (act_flags && !tca_flags_valid(act_flags))
+		return -EINVAL;
+
 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			cb->nlh->nlmsg_type, sizeof(*t), 0);
 	if (!nlh)
 		goto out_module_put;
+
+	cb->args[2] = act_flags;
 	t = nlmsg_data(nlh);
 	t->tca_family = AF_UNSPEC;
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
+	count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32));
+	if (!count_attr)
+		goto out_module_put;
 
 	nest = nla_nest_start(skb, TCA_ACT_TAB);
 	if (nest == NULL)
@@ -1113,6 +1149,9 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (ret > 0) {
 		nla_nest_end(skb, nest);
 		ret = skb->len;
+		act_count = cb->args[1];
+		memcpy(nla_data(count_attr), &act_count, sizeof(u32));
+		cb->args[1] = 0;
 	} else
 		nlmsg_trim(skb, b);
 
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next v8 1/3] net sched actions: Use proper root attribute table for actions
From: Jamal Hadi Salim @ 2017-04-25 11:54 UTC (permalink / raw)
  To: davem; +Cc: jiri, xiyou.wangcong, eric.dumazet, netdev, Jamal Hadi Salim
In-Reply-To: <1493121247-11863-1-git-send-email-jhs@emojatatu.com>

From: Jamal Hadi Salim <jhs@mojatatu.com>

Bug fix for an issue which has been around for about a decade.
We got away with it because the enumeration was larger than needed.

Fixes: 7ba699c604ab ("[NET_SCHED]: Convert actions from rtnetlink to new netlink API")
Suggested-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
 net/sched/act_api.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 82b1d48..9ce22b7 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -997,7 +997,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 			 struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(skb->sk);
-	struct nlattr *tca[TCA_ACT_MAX + 1];
+	struct nlattr *tca[TCAA_MAX + 1];
 	u32 portid = skb ? NETLINK_CB(skb).portid : 0;
 	int ret = 0, ovr = 0;
 
@@ -1005,7 +1005,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 	    !netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
-	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL,
+	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCAA_MAX, NULL,
 			  extack);
 	if (ret < 0)
 		return ret;
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next v8 0/3] net sched actions: improve dump performance
From: Jamal Hadi Salim @ 2017-04-25 11:54 UTC (permalink / raw)
  To: davem; +Cc: jiri, xiyou.wangcong, eric.dumazet, netdev, Jamal Hadi Salim

From: Jamal Hadi Salim <jhs@mojatatu.com>

Changes since v7:
-----------------

Jamal:
Patch 1 went out twice. Resend without two copies of patch 1

changes since v6:
-----------------

1) DaveM:
New rules for netlink messages. From now on we are going to start
checking for bits that are not used and rejecting anything we dont
understand. In the future this is going to require major changes
to user space code (tc etc). This is just a start.

To quote, David:
"
 Again, bits you aren't using now, make sure userspace doesn't
   set them.  And if it does, reject.
"

2) Jiri:
a)Fix the commit message to properly use "Fixes" description
b)Align assignments for nla_policy

Changes since v5:
----------------

0)
Remove use of BIT() because it is kernel specific. Requires a separate
patch (Jiri can submit that in his cleanups)

1)To paraphrase Eric D.

"memcpy(nla_data(count_attr), &cb->args[1], sizeof(u32));
wont work on 64bit BE machines because cb->args[1]
(which is 64 bit is larger in size than sizeof(u32))"

Fixed

2) Jiri Pirko

i) Spotted a bug fix mixed in the patch for wrong TLV
fix. Add patch 1/3 to address this. Make part of this
series because of dependencies.

ii) Rename ACT_LARGE_DUMP_ON -> TCA_FLAG_LARGE_DUMP_ON

iii) Satisfy Jiri's obsession against the noun "tcaa"
a)Rename struct nlattr *tcaa --> struct nlattr *tb
b)Rename TCAA_ACT_XXX -> TCA_ROOT_XXX

Changes since v4:
-----------------

1) Eric D.

pointed out that when all skb space is used up by the dump
there will be no space to insert the TCAA_ACT_COUNT attribute.

2) Jiri:

i) Change:

enum {
        TCAA_UNSPEC,
        TCAA_ACT_TAB,
        TCAA_ACT_FLAGS,
        TCAA_ACT_COUNT,
        TCAA_ACT_TIME_FILTER,
        __TCAA_MAX
};

#define TCAA_MAX (__TCAA_MAX - 1)
#define ACT_LARGE_DUMP_ON               (1 << 0)

to:
enum {
       TCAA_UNSPEC,
       TCAA_ACT_TAB,
#define TCA_ACT_TAB TCAA_ACT_TAB
       TCAA_ACT_FLAGS,
       TCAA_ACT_COUNT,
       __TCAA_MAX,
#define        TCAA_MAX (__TCAA_MAX - 1)
};

#define ACT_LARGE_DUMP_ON              BIT(0)

Jiri plans to followup with the rest of the code to make the
style consistent.

ii) Rename attribute TCAA_ACT_TIME_FILTER --> TCAA_ACT_TIME_DELTA

iii) Rename variable jiffy_filter --> jiffy_since
iv) Rename msecs_filter --> msecs_since
v) get rid of unused cb->args[0] and rename cb->args[4] to cb->args[0]

Jamal Hadi Salim (3):
  net sched actions: User proper root attribute table for actions
  net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  net sched actions: add time filter for action dumping

 include/uapi/linux/rtnetlink.h | 22 ++++++++++++--
 net/sched/act_api.c            | 66 +++++++++++++++++++++++++++++++++++-------
 2 files changed, 75 insertions(+), 13 deletions(-)

-- 
1.9.1


Jamal Hadi Salim (3):
  net sched actions: Use proper root attribute table for actions
  net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
  net sched actions: add time filter for action dumping

 include/uapi/linux/rtnetlink.h | 23 ++++++++++--
 net/sched/act_api.c            | 79 ++++++++++++++++++++++++++++++++++++------
 2 files changed, 89 insertions(+), 13 deletions(-)

-- 
1.9.1

^ permalink raw reply

* Re: [PATCH net-next v7 0/3] net sched actions: improve dump performance
From: Jamal Hadi Salim @ 2017-04-25 11:52 UTC (permalink / raw)
  To: davem; +Cc: jiri, xiyou.wangcong, eric.dumazet, netdev
In-Reply-To: <1493120988-11765-1-git-send-email-jhs@emojatatu.com>


Grr. I gitta git my git-foo right.
Patch 1 repeated twice - I'll send v8.

cheers,
jamal

^ permalink raw reply

* [PATCH net-next v7 2/3] net sched actions: dump more than TCA_ACT_MAX_PRIO actions per batch
From: Jamal Hadi Salim @ 2017-04-25 11:49 UTC (permalink / raw)
  To: davem; +Cc: jiri, xiyou.wangcong, eric.dumazet, netdev, Jamal Hadi Salim
In-Reply-To: <1493120988-11765-1-git-send-email-jhs@emojatatu.com>

From: Jamal Hadi Salim <jhs@mojatatu.com>

When you dump hundreds of thousands of actions, getting only 32 per
dump batch even when the socket buffer and memory allocations allow
is inefficient.

With this change, the user will get as many as possibly fitting
within the given constraints available to the kernel.

The top level action TLV space is extended. An attribute
TCA_ROOT_FLAGS is used to carry flags; flag TCA_FLAG_LARGE_DUMP_ON
is set by the user indicating the user is capable of processing
these large dumps. Older user space which doesnt set this flag
doesnt get the large (than 32) batches.
The kernel uses the TCA_ROOT_COUNT attribute to tell the user how many
actions are put in a single batch. As such user space app knows how long
to iterate (independent of the type of action being dumped)
instead of hardcoded maximum of 32.

Some results dumping 1.5M actions, first unpatched tc which the
kernel doesnt help:

prompt$ time -p tc actions ls action gact | grep index | wc -l
1500000
real 1388.43
user 2.07
sys 1386.79

Now lets see a patched tc which sets the correct flags when requesting
a dump:

prompt$ time -p updatedtc actions ls action gact | grep index | wc -l
1500000
real 178.13
user 2.02
sys 176.96

That is about 8x performance improvement for tc which sets its
receive buffer to about 32K.

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
 include/uapi/linux/rtnetlink.h | 22 ++++++++++++++--
 net/sched/act_api.c            | 59 +++++++++++++++++++++++++++++++++++-------
 2 files changed, 69 insertions(+), 12 deletions(-)

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index cce0613..5875467 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -674,10 +674,28 @@ struct tcamsg {
 	unsigned char	tca__pad1;
 	unsigned short	tca__pad2;
 };
+
+enum {
+	TCA_ROOT_UNSPEC,
+	TCA_ROOT_TAB,
+#define TCA_ACT_TAB TCA_ROOT_TAB
+	TCA_ROOT_FLAGS,
+	TCA_ROOT_COUNT,
+	__TCA_ROOT_MAX,
+#define	TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
+};
+
 #define TA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcamsg))))
 #define TA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcamsg))
-#define TCA_ACT_TAB 1 /* attr type must be >=1 */	
-#define TCAA_MAX 1
+/* tcamsg flags stored in attribute TCA_ROOT_FLAGS
+ *
+ * TCA_FLAG_LARGE_DUMP_ON user->kernel to request for larger than TCA_ACT_MAX_PRIO
+ * actions in a dump. All dump responses will contain the number of actions
+ * being dumped stored in for user app's consumption in TCA_ROOT_COUNT
+ *
+ */
+#define TCA_FLAG_LARGE_DUMP_ON		(1 << 0)
+#define VALID_TCA_FLAGS TCA_FLAG_LARGE_DUMP_ON
 
 /* New extended info filters for IFLA_EXT_MASK */
 #define RTEXT_FILTER_VF		(1 << 0)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 9ce22b7..2756213 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -83,6 +83,7 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
 			   struct netlink_callback *cb)
 {
 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
+	u32 act_flags = cb->args[2];
 	struct nlattr *nest;
 
 	spin_lock_bh(&hinfo->lock);
@@ -111,14 +112,18 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
 			}
 			nla_nest_end(skb, nest);
 			n_i++;
-			if (n_i >= TCA_ACT_MAX_PRIO)
+			if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) &&
+			    n_i >= TCA_ACT_MAX_PRIO)
 				goto done;
 		}
 	}
 done:
 	spin_unlock_bh(&hinfo->lock);
-	if (n_i)
+	if (n_i) {
 		cb->args[0] += n_i;
+		if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
+			cb->args[1] = n_i;
+	}
 	return n_i;
 
 nla_put_failure:
@@ -993,11 +998,15 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
 	return tcf_add_notify(net, n, &actions, portid);
 }
 
+static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
+	[TCA_ROOT_FLAGS]      = { .type = NLA_U32 },
+};
+
 static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 			 struct netlink_ext_ack *extack)
 {
 	struct net *net = sock_net(skb->sk);
-	struct nlattr *tca[TCAA_MAX + 1];
+	struct nlattr *tca[TCA_ROOT_MAX + 1];
 	u32 portid = skb ? NETLINK_CB(skb).portid : 0;
 	int ret = 0, ovr = 0;
 
@@ -1005,7 +1014,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 	    !netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
 
-	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCAA_MAX, NULL,
+	ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL,
 			  extack);
 	if (ret < 0)
 		return ret;
@@ -1046,16 +1055,12 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
 	return ret;
 }
 
-static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
+static struct nlattr *find_dump_kind(struct nlattr **nla)
 {
 	struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1];
 	struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
-	struct nlattr *nla[TCAA_MAX + 1];
 	struct nlattr *kind;
 
-	if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX,
-			NULL, NULL) < 0)
-		return NULL;
 	tb1 = nla[TCA_ACT_TAB];
 	if (tb1 == NULL)
 		return NULL;
@@ -1073,6 +1078,16 @@ static struct nlattr *find_dump_kind(const struct nlmsghdr *n)
 	return kind;
 }
 
+static inline bool tca_flags_valid(u32 act_flags)
+{
+	u32 invalid_flags_mask  = ~VALID_TCA_FLAGS;
+
+	if (act_flags & invalid_flags_mask)
+		return false;
+
+	return true;
+}
+
 static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
@@ -1082,8 +1097,18 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	struct tc_action_ops *a_o;
 	int ret = 0;
 	struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
-	struct nlattr *kind = find_dump_kind(cb->nlh);
+	struct nlattr *count_attr = NULL;
+	struct nlattr *tb[TCA_ROOT_MAX + 1];
+	struct nlattr *kind = NULL;
+	u32 act_flags = 0;
+	u32 act_count = 0;
+
+	ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX,
+			  tcaa_policy, NULL);
+	if (ret < 0)
+		return ret;
 
+	kind = find_dump_kind(tb);
 	if (kind == NULL) {
 		pr_info("tc_dump_action: action bad kind\n");
 		return 0;
@@ -1093,14 +1118,25 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (a_o == NULL)
 		return 0;
 
+	if (tb[TCA_ROOT_FLAGS])
+		act_flags = nla_get_u32(tb[TCA_ROOT_FLAGS]);
+
+	if (act_flags && !tca_flags_valid(act_flags))
+		return -EINVAL;
+
 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			cb->nlh->nlmsg_type, sizeof(*t), 0);
 	if (!nlh)
 		goto out_module_put;
+
+	cb->args[2] = act_flags;
 	t = nlmsg_data(nlh);
 	t->tca_family = AF_UNSPEC;
 	t->tca__pad1 = 0;
 	t->tca__pad2 = 0;
+	count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32));
+	if (!count_attr)
+		goto out_module_put;
 
 	nest = nla_nest_start(skb, TCA_ACT_TAB);
 	if (nest == NULL)
@@ -1113,6 +1149,9 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (ret > 0) {
 		nla_nest_end(skb, nest);
 		ret = skb->len;
+		act_count = cb->args[1];
+		memcpy(nla_data(count_attr), &act_count, sizeof(u32));
+		cb->args[1] = 0;
 	} else
 		nlmsg_trim(skb, b);
 
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next v7 3/3] net sched actions: add time filter for action dumping
From: Jamal Hadi Salim @ 2017-04-25 11:49 UTC (permalink / raw)
  To: davem; +Cc: jiri, xiyou.wangcong, eric.dumazet, netdev, Jamal Hadi Salim
In-Reply-To: <1493120988-11765-1-git-send-email-jhs@emojatatu.com>

From: Jamal Hadi Salim <jhs@mojatatu.com>

This adds support for filtering based on time since last used.
When we are dumping a large number of actions it is useful to
have the option of filtering based on when the action was last
used to reduce the amount of data crossing to user space.

With this patch the user space app sets the TCA_ROOT_TIME_DELTA
attribute with the value in milliseconds with "time of interest
since now".  The kernel converts this to jiffies and does the
filtering comparison matching entries that have seen activity
since then and returns them to user space.
Old kernels and old tc continue to work in legacy mode since
they dont specify this attribute.

Some example (we have 400 actions bound to 400 filters); at
installation time using  hacked tc which sets the time of
interest to 120 seconds:
prompt$ hackedtc actions ls action gact | grep index | wc -l
400

go get some coffee and  wait for > 120 seconds and try again:

prompt$ hackedtc actions ls action gact | grep index | wc -l
0

Lets see a filter bound to one of these actions:
..
filter pref 10 u32
filter pref 10 u32 fh 800: ht divisor 1
filter pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:10  (rule hit 2 success 1)
  match 7f000002/ffffffff at 12 (success 1 )
    action order 1: gact action pass
     random type none pass val 0
     index 23 ref 2 bind 1 installed 1145 sec used 802 sec
    Action statistics:
    Sent 84 bytes 1 pkt (dropped 0, overlimits 0 requeues 0)
    backlog 0b 0p requeues 0
....

that coffee took long, no? It was good.

Now lets ping -c 1 127.0.0.2, then run the actions again:

prompt$ hackedtc actions ls action gact | grep index | wc -l
1

More details please:

prompt$ hackedtc -s actions ls action gact

    action order 0: gact action pass
     random type none pass val 0
     index 23 ref 2 bind 1 installed 1270 sec used 30 sec
    Action statistics:
    Sent 168 bytes 2 pkt (dropped 0, overlimits 0 requeues 0)
    backlog 0b 0p requeues 0

And the filter?

filter pref 10 u32
filter pref 10 u32 fh 800: ht divisor 1
filter pref 10 u32 fh 800::800 order 2048 key ht 800 bkt 0 flowid 1:10  (rule hit 4 success 2)
  match 7f000002/ffffffff at 12 (success 2 )
    action order 1: gact action pass
     random type none pass val 0
     index 23 ref 2 bind 1 installed 1324 sec used 84 sec
    Action statistics:
    Sent 168 bytes 2 pkt (dropped 0, overlimits 0 requeues 0)
    backlog 0b 0p requeues 0

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
---
 include/uapi/linux/rtnetlink.h |  1 +
 net/sched/act_api.c            | 22 ++++++++++++++++++++--
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 5875467..39c7499 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -681,6 +681,7 @@ enum {
 #define TCA_ACT_TAB TCA_ROOT_TAB
 	TCA_ROOT_FLAGS,
 	TCA_ROOT_COUNT,
+	TCA_ROOT_TIME_DELTA, /* in msecs */
 	__TCA_ROOT_MAX,
 #define	TCA_ROOT_MAX (__TCA_ROOT_MAX - 1)
 };
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 2756213..c0aee2c 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -84,6 +84,7 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
 {
 	int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
 	u32 act_flags = cb->args[2];
+	unsigned long jiffy_since = cb->args[3];
 	struct nlattr *nest;
 
 	spin_lock_bh(&hinfo->lock);
@@ -101,6 +102,11 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
 			if (index < s_i)
 				continue;
 
+			if (jiffy_since &&
+			    time_after(jiffy_since,
+				       (unsigned long)p->tcfa_tm.lastuse))
+				continue;
+
 			nest = nla_nest_start(skb, n_i);
 			if (nest == NULL)
 				goto nla_put_failure;
@@ -118,9 +124,11 @@ static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb,
 		}
 	}
 done:
+	if (index > 0)
+		cb->args[0] = index + 1;
+
 	spin_unlock_bh(&hinfo->lock);
 	if (n_i) {
-		cb->args[0] += n_i;
 		if (act_flags & TCA_FLAG_LARGE_DUMP_ON)
 			cb->args[1] = n_i;
 	}
@@ -999,7 +1007,8 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
 }
 
 static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
-	[TCA_ROOT_FLAGS]      = { .type = NLA_U32 },
+	[TCA_ROOT_FLAGS]           = { .type = NLA_U32 },
+	[TCA_ROOT_TIME_DELTA]      = { .type = NLA_U32 },
 };
 
 static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n,
@@ -1099,7 +1108,9 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh);
 	struct nlattr *count_attr = NULL;
 	struct nlattr *tb[TCA_ROOT_MAX + 1];
+	unsigned long jiffy_since = 0;
 	struct nlattr *kind = NULL;
+	u32 msecs_since = 0;
 	u32 act_flags = 0;
 	u32 act_count = 0;
 
@@ -1124,12 +1135,19 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb)
 	if (act_flags && !tca_flags_valid(act_flags))
 		return -EINVAL;
 
+	if (tb[TCA_ROOT_TIME_DELTA])
+		msecs_since = nla_get_u32(tb[TCA_ROOT_TIME_DELTA]);
+
 	nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
 			cb->nlh->nlmsg_type, sizeof(*t), 0);
 	if (!nlh)
 		goto out_module_put;
 
+	if (msecs_since)
+		jiffy_since = jiffies - msecs_to_jiffies(msecs_since);
+
 	cb->args[2] = act_flags;
+	cb->args[3] = jiffy_since;
 	t = nlmsg_data(nlh);
 	t->tca_family = AF_UNSPEC;
 	t->tca__pad1 = 0;
-- 
1.9.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox