* [PATCH net-next v3 1/2] rtnetlink: add new RTM_GETSTATS message to query stats
From: Roopa Prabhu @ 2016-04-16 3:28 UTC (permalink / raw)
To: netdev; +Cc: jhs, davem, tgraf
From: Roopa Prabhu <roopa@cumulusnetworks.com>
This patch adds a new RTM_GETSTATS message to query stats via netlink
from the kernel. RTM_NEWLINK also dumps link stats today, but RTM_NEWLINK
returns a lot more than just stats and is expensive in some cases when
frequent polling for stats from userspace is a common operation.
RTM_GETSTATS is an attempt to provide a light weight netlink message
to explicity query only stats from the kernel. The idea is to also
keep it extensible so that new kinds of stats can be added to it in
the future.
This patch adds the following attribute for NETDEV stats:
struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = {
[IFLA_STATS_LINK_64] = { .len = sizeof(struct rtnl_link_stats64) },
};
This patch also allows for af family stats (an example af stats for IPV6
is available with the second patch in the series).
Like any other rtnetlink message, RTM_GETSTATS can be used to get stats of
a single interface or all interfaces with NLM_F_DUMP.
Future possible new types of stat attributes:
- IFLA_STATS_LINK_MPLS (nested. for mpls/mdev stats)
- IFLA_STATS_LINK_EXTENDED (nested. extended software netdev stats like bridge,
vlan, vxlan etc)
- IFLA_STATS_LINK_HW_EXTENDED (nested. extended hardware stats which are
available via ethtool today)
This patch also declares a filter mask for all stat attributes.
User has to provide a mask of stats attributes to query. filter mask
can be specified in the new hdr 'struct if_stats_msg' for stats messages.
Other important field in the header is the ifindex.
This api can be used for global stats (eg tcp) in the future. When global
stats are included in a stats msg, the ifindex in the header
must be zero. A single stats message cannot contain both global and
netdev specific stats. To easily distinguish them, netdev specific stat
attributes name are prefixed with IFLA_STATS_LINK_
Without any attributes in the filter_mask, no stats will be returned.
This patch has been tested with mofified iproute2 ifstat.
Suggested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
---
include/net/rtnetlink.h | 5 ++
include/uapi/linux/if_link.h | 23 +++++
include/uapi/linux/rtnetlink.h | 5 ++
net/core/rtnetlink.c | 199 +++++++++++++++++++++++++++++++++++++++++
4 files changed, 232 insertions(+)
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index 2f87c1b..fa68158 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -131,6 +131,11 @@ struct rtnl_af_ops {
const struct nlattr *attr);
int (*set_link_af)(struct net_device *dev,
const struct nlattr *attr);
+ size_t (*get_link_af_stats_size)(const struct net_device *dev,
+ u32 filter_mask);
+ int (*fill_link_af_stats)(struct sk_buff *skb,
+ const struct net_device *dev,
+ u32 filter_mask);
};
void __rtnl_af_unregister(struct rtnl_af_ops *ops);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 9427f17..ab740fe 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -780,4 +780,27 @@ enum {
#define IFLA_HSR_MAX (__IFLA_HSR_MAX - 1)
+/* STATS section */
+
+struct if_stats_msg {
+ __u8 family;
+ __u8 pad1;
+ __u16 pad2;
+ __u32 ifindex;
+ __u32 filter_mask;
+};
+
+/* A stats attribute can be netdev specific or a global stat.
+ * For netdev stats, lets use the prefix IFLA_STATS_LINK_*
+ */
+enum {
+ IFLA_STATS_UNSPEC,
+ IFLA_STATS_LINK_64,
+ __IFLA_STATS_MAX,
+};
+
+#define IFLA_STATS_MAX (__IFLA_STATS_MAX - 1)
+
+#define IFLA_STATS_FILTER_BIT(ATTR) (1 << (ATTR))
+
#endif /* _UAPI_LINUX_IF_LINK_H */
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index ca764b5..cc885c4 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -139,6 +139,11 @@ enum {
RTM_GETNSID = 90,
#define RTM_GETNSID RTM_GETNSID
+ RTM_NEWSTATS = 92,
+#define RTM_NEWSTATS RTM_NEWSTATS
+ RTM_GETSTATS = 94,
+#define RTM_GETSTATS RTM_GETSTATS
+
__RTM_MAX,
#define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1)
};
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a7a3d34..2a8abe0 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3444,6 +3444,202 @@ out:
return err;
}
+static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
+ int type, u32 pid, u32 seq, u32 change,
+ unsigned int flags, unsigned int filter_mask)
+{
+ struct if_stats_msg *ifsm;
+ struct nlmsghdr *nlh;
+ struct rtnl_af_ops *af_ops;
+ struct nlattr *attr;
+
+ ASSERT_RTNL();
+
+ nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifsm), flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ ifsm = nlmsg_data(nlh);
+ ifsm->ifindex = dev->ifindex;
+ ifsm->filter_mask = filter_mask;
+
+ if (filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_64)) {
+ struct rtnl_link_stats64 *sp;
+
+ attr = nla_reserve(skb, IFLA_STATS_LINK_64,
+ sizeof(struct rtnl_link_stats64));
+ if (!attr)
+ return -EMSGSIZE;
+
+ sp = nla_data(attr);
+ dev_get_stats(dev, sp);
+ }
+
+ list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ if (af_ops->fill_link_af_stats) {
+ int err;
+
+ err = af_ops->fill_link_af_stats(skb, dev, filter_mask);
+ if (err < 0)
+ goto nla_put_failure;
+ }
+ }
+
+ nlmsg_end(skb, nlh);
+
+ return 0;
+
+nla_put_failure:
+ nlmsg_cancel(skb, nlh);
+
+ return -EMSGSIZE;
+}
+
+static const struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = {
+ [IFLA_STATS_LINK_64] = { .len = sizeof(struct rtnl_link_stats64) },
+};
+
+static size_t rtnl_link_get_af_stats_size(const struct net_device *dev,
+ u32 filter_mask)
+{
+ struct rtnl_af_ops *af_ops;
+ size_t size = 0;
+
+ list_for_each_entry(af_ops, &rtnl_af_ops, list) {
+ if (af_ops->get_link_af_stats_size)
+ size += af_ops->get_link_af_stats_size(dev,
+ filter_mask);
+ }
+
+ return size;
+}
+
+static size_t if_nlmsg_stats_size(const struct net_device *dev,
+ u32 filter_mask)
+{
+ size_t size = 0;
+
+ if (filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_64))
+ size += nla_total_size(sizeof(struct rtnl_link_stats64));
+
+ size += rtnl_link_get_af_stats_size(dev, filter_mask);
+
+ return size;
+}
+
+static int rtnl_stats_get(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ struct net *net = sock_net(skb->sk);
+ struct if_stats_msg *ifsm;
+ struct net_device *dev = NULL;
+ struct sk_buff *nskb;
+ u32 filter_mask;
+ int err;
+
+ ifsm = nlmsg_data(nlh);
+ if (ifsm->ifindex > 0)
+ dev = __dev_get_by_index(net, ifsm->ifindex);
+ else
+ return -EINVAL;
+
+ if (!dev)
+ return -ENODEV;
+
+ filter_mask = ifsm->filter_mask;
+ if (!filter_mask)
+ return -EINVAL;
+
+ nskb = nlmsg_new(if_nlmsg_stats_size(dev, filter_mask), GFP_KERNEL);
+ if (!nskb)
+ return -ENOBUFS;
+
+ err = rtnl_fill_statsinfo(nskb, dev, RTM_NEWSTATS,
+ NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
+ 0, filter_mask);
+ if (err < 0) {
+ /* -EMSGSIZE implies BUG in if_nlmsg_stats_size */
+ WARN_ON(err == -EMSGSIZE);
+ kfree_skb(nskb);
+ } else {
+ err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid);
+ }
+
+ return err;
+}
+
+static u16 rtnl_stats_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+ struct net *net = sock_net(skb->sk);
+ struct net_device *dev;
+ u16 min_ifinfo_dump_size = 0;
+ struct if_stats_msg *ifsm;
+ u32 filter_mask;
+
+ ifsm = nlmsg_data(nlh);
+ filter_mask = ifsm->filter_mask;
+
+ /* traverse the list of net devices and compute the minimum
+ * buffer size based upon the filter mask.
+ */
+ list_for_each_entry(dev, &net->dev_base_head, dev_list) {
+ min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size,
+ if_nlmsg_stats_size(dev,
+ filter_mask));
+ }
+
+ return min_ifinfo_dump_size;
+}
+
+static int rtnl_stats_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct if_stats_msg *ifsm;
+ int h, s_h;
+ int idx = 0, s_idx;
+ struct net_device *dev;
+ struct hlist_head *head;
+ unsigned int flags = NLM_F_MULTI;
+ u32 filter_mask = 0;
+ int err;
+
+ s_h = cb->args[0];
+ s_idx = cb->args[1];
+
+ cb->seq = net->dev_base_seq;
+
+ ifsm = nlmsg_data(cb->nlh);
+ filter_mask = ifsm->filter_mask;
+
+ for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+ idx = 0;
+ head = &net->dev_index_head[h];
+ hlist_for_each_entry(dev, head, index_hlist) {
+ if (idx < s_idx)
+ goto cont;
+ err = rtnl_fill_statsinfo(skb, dev, RTM_NEWSTATS,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, 0,
+ flags, filter_mask);
+ /* If we ran out of room on the first message,
+ * we're in trouble
+ */
+ WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
+
+ if (err < 0)
+ goto out;
+
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+cont:
+ idx++;
+ }
+ }
+out:
+ cb->args[1] = idx;
+ cb->args[0] = h;
+
+ return skb->len;
+}
+
/* Process one rtnetlink message. */
static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -3593,4 +3789,7 @@ void __init rtnetlink_init(void)
rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL);
rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL);
rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL);
+
+ rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump,
+ rtnl_stats_calcit);
}
--
1.9.1
^ permalink raw reply related
* [PATCH net-next v3 RFC 2/2] ipv6: add support for stats via RTM_GETSTATS
From: Roopa Prabhu @ 2016-04-16 3:28 UTC (permalink / raw)
To: netdev; +Cc: jhs, davem, tgraf
From: Roopa Prabhu <roopa@cumulusnetworks.com>
This patch is an example of adding af stats in
RTM_GETSTATS. It adds a new nested IFLA_STATS_LINK_INET6
attribute for ipv6 af stats. stats attributes inside
IFLA_STATS_LINK_INET6 nested attribute use the existing ipv6
stats attributes from ipv6 IFLA_PROTINFO
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
---
This patch is an example of af stats hooked into the new stats
infrastructure. I have tested it to work. My real intent is to have
IFLA_STATS_LINK_MPLS implemented in the same way for mpls.
I am not sure how popular the current ipv6 stats are. so, we could
rethink ipv6 stats in a new way when people see the need
for it in the future.
include/uapi/linux/if_link.h | 1 +
net/core/rtnetlink.c | 1 +
net/ipv6/addrconf.c | 77 +++++++++++++++++++++++++++++++++++++++-----
3 files changed, 71 insertions(+), 8 deletions(-)
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index ab740fe..a419a6a2 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -796,6 +796,7 @@ struct if_stats_msg {
enum {
IFLA_STATS_UNSPEC,
IFLA_STATS_LINK_64,
+ IFLA_STATS_LINK_INET6,
__IFLA_STATS_MAX,
};
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2a8abe0..687718a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3497,6 +3497,7 @@ nla_put_failure:
static const struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = {
[IFLA_STATS_LINK_64] = { .len = sizeof(struct rtnl_link_stats64) },
+ [IFLA_STATS_LINK_INET6] = {. type = NLA_NESTED },
};
static size_t rtnl_link_get_af_stats_size(const struct net_device *dev,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a6c9927..fdca37c 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4917,6 +4917,29 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,
}
}
+static int inet6_fill_ifla6_stats(struct sk_buff *skb,
+ struct inet6_dev *idev)
+{
+ struct nlattr *nla;
+
+ nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
+ if (!nla)
+ goto nla_put_failure;
+ snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
+
+ nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS,
+ ICMP6_MIB_MAX * sizeof(u64));
+ if (!nla)
+ goto nla_put_failure;
+ snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS,
+ nla_len(nla));
+
+ return 0;
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
u32 ext_filter_mask)
{
@@ -4941,15 +4964,8 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
if (ext_filter_mask & RTEXT_FILTER_SKIP_STATS)
return 0;
- nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64));
- if (!nla)
- goto nla_put_failure;
- snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla));
-
- nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64));
- if (!nla)
+ if (inet6_fill_ifla6_stats(skb, idev))
goto nla_put_failure;
- snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla));
nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr));
if (!nla)
@@ -4991,6 +5007,49 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev,
return 0;
}
+static size_t inet6_get_link_af_stats_size(const struct net_device *dev,
+ u32 filter_mask)
+{
+ if (!(filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_INET6)))
+ return 0;
+
+ if (!__in6_dev_get(dev))
+ return 0;
+
+ return nla_total_size(sizeof(struct nlattr)) /* IFLA_STATS_LINK_INET6 */
+ + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
+ + nla_total_size(ICMP6_MIB_MAX * sizeof(u64));/* IFLA_INET6_ICMP6STATS */
+}
+
+static int inet6_fill_link_af_stats(struct sk_buff *skb,
+ const struct net_device *dev,
+ u32 filter_mask)
+{
+ struct inet6_dev *idev = __in6_dev_get(dev);
+ struct nlattr *inet6_stats;
+
+ if (!(filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_INET6)))
+ return 0;
+
+ if (!idev)
+ return -ENODATA;
+
+ inet6_stats = nla_nest_start(skb, IFLA_STATS_LINK_INET6);
+ if (!inet6_stats)
+ return -EMSGSIZE;
+
+ if (inet6_fill_ifla6_stats(skb, idev) < 0)
+ goto errout;
+
+ nla_nest_end(skb, inet6_stats);
+
+ return 0;
+errout:
+ nla_nest_cancel(skb, inet6_stats);
+
+ return -EMSGSIZE;
+}
+
static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
{
struct inet6_ifaddr *ifp;
@@ -6087,6 +6146,8 @@ static struct rtnl_af_ops inet6_ops __read_mostly = {
.get_link_af_size = inet6_get_link_af_size,
.validate_link_af = inet6_validate_link_af,
.set_link_af = inet6_set_link_af,
+ .get_link_af_stats_size = inet6_get_link_af_stats_size,
+ .fill_link_af_stats = inet6_fill_link_af_stats,
};
/*
--
1.9.1
^ permalink raw reply related
* [PATCH net-next] rtnetlink: rtnl_fill_stats: avoid an unnecssary stats copy
From: Roopa Prabhu @ 2016-04-16 3:36 UTC (permalink / raw)
To: davem; +Cc: netdev
From: Roopa Prabhu <roopa@cumulusnetworks.com>
This patch passes netlink attr data ptr directly to dev_get_stats
thus elimiating a stats copy.
Suggested-by: David Miller <davem@davemloft.net>
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
---
net/core/rtnetlink.c | 23 ++++++++---------------
1 file changed, 8 insertions(+), 15 deletions(-)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a75f7e9..a7a3d34 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -808,11 +808,6 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
a->rx_nohandler = b->rx_nohandler;
}
-static void copy_rtnl_link_stats64(void *v, const struct rtnl_link_stats64 *b)
-{
- memcpy(v, b, sizeof(*b));
-}
-
/* All VF info */
static inline int rtnl_vfinfo_size(const struct net_device *dev,
u32 ext_filter_mask)
@@ -1054,25 +1049,23 @@ static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
static noinline_for_stack int rtnl_fill_stats(struct sk_buff *skb,
struct net_device *dev)
{
- const struct rtnl_link_stats64 *stats;
- struct rtnl_link_stats64 temp;
+ struct rtnl_link_stats64 *sp;
struct nlattr *attr;
- stats = dev_get_stats(dev, &temp);
-
- attr = nla_reserve(skb, IFLA_STATS,
- sizeof(struct rtnl_link_stats));
+ attr = nla_reserve(skb, IFLA_STATS64,
+ sizeof(struct rtnl_link_stats64));
if (!attr)
return -EMSGSIZE;
- copy_rtnl_link_stats(nla_data(attr), stats);
+ sp = nla_data(attr);
+ dev_get_stats(dev, sp);
- attr = nla_reserve(skb, IFLA_STATS64,
- sizeof(struct rtnl_link_stats64));
+ attr = nla_reserve(skb, IFLA_STATS,
+ sizeof(struct rtnl_link_stats));
if (!attr)
return -EMSGSIZE;
- copy_rtnl_link_stats64(nla_data(attr), stats);
+ copy_rtnl_link_stats(nla_data(attr), sp);
return 0;
}
--
1.9.1
^ permalink raw reply related
* Re: [PATCH 2/2] rtlwifi: Fix reusable codes in core.c
From: Kalle Valo @ 2016-04-16 4:31 UTC (permalink / raw)
To: Julian Calaby
Cc: Byeoungwook Kim, Larry Finger, Chaoming Li, linux-wireless,
netdev, linux-kernel@vger.kernel.org
In-Reply-To: <CAGRGNgX0uy=R1dd7UMO5aZu69QDUXrtZDCjdJnB1Yj3XzjemHA@mail.gmail.com>
Julian Calaby <julian.calaby@gmail.com> writes:
> Hi Kalle,
>
> On Sat, Apr 16, 2016 at 4:25 AM, Kalle Valo <kvalo@codeaurora.org> wrote:
>> Byeoungwook Kim <quddnr145@gmail.com> writes:
>>
>>> rtl_*_delay() functions were reused same codes about addr variable.
>>> So i have converted to rtl_addr_delay() from code about addr variable.
>>>
>>> Signed-off-by: Byeoungwook Kim <quddnr145@gmail.com>
>>> Reviewed-by: Julian Calaby <julian.calaby@gmail.com>
>>
>> Doesn't apply:
>>
>> Applying: rtlwifi: Fix reusable codes in core.c
>> fatal: sha1 information is lacking or useless (drivers/net/wireless/realtek/rtlwifi/core.c).
>> Repository lacks necessary blobs to fall back on 3-way merge.
>> Cannot fall back to three-way merge.
>> Patch failed at 0001 rtlwifi: Fix reusable codes in core.c
>>
>> Please rebase and resend.
>
> This one is already applied in some form. I thought I'd listed it in
> my big list of superseded patches, however I must have missed it.
Or I missed it :) But good to know, so no actions needed anymore.
--
Kalle Valo
^ permalink raw reply
* Re: [PATCH] netlink: don't send NETLINK_URELEASE for unbound sockets
From: Herbert Xu @ 2016-04-16 6:30 UTC (permalink / raw)
To: Johannes Berg; +Cc: netdev, dmitrijs.ivanovs, linux-wireless
In-Reply-To: <1460014298-30293-1-git-send-email-johannes@sipsolutions.net>
Johannes Berg <johannes@sipsolutions.net> wrote:
>
> diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
> index 215fc08c02ab..330ebd600f25 100644
> --- a/net/netlink/af_netlink.c
> +++ b/net/netlink/af_netlink.c
> @@ -688,7 +688,7 @@ static int netlink_release(struct socket *sock)
>
> skb_queue_purge(&sk->sk_write_queue);
>
> - if (nlk->portid) {
> + if (nlk->portid && nlk->bound) {
Any reason why we're still testing portid at all? Isn't testing
bound enough?
Thanks,
--
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
^ permalink raw reply
* Re: [PATCH net-next v3 1/2] rtnetlink: add new RTM_GETSTATS message to query stats
From: Thomas Graf @ 2016-04-16 7:49 UTC (permalink / raw)
To: Roopa Prabhu; +Cc: netdev, jhs, davem
In-Reply-To: <1460777293-39474-2-git-send-email-roopa@cumulusnetworks.com>
On 04/15/16 at 08:28pm, Roopa Prabhu wrote:
> +static u16 rtnl_stats_calcit(struct sk_buff *skb, struct nlmsghdr *nlh)
> +{
> + struct net *net = sock_net(skb->sk);
> + struct net_device *dev;
> + u16 min_ifinfo_dump_size = 0;
> + struct if_stats_msg *ifsm;
> + u32 filter_mask;
> +
> + ifsm = nlmsg_data(nlh);
> + filter_mask = ifsm->filter_mask;
> +
> + /* traverse the list of net devices and compute the minimum
> + * buffer size based upon the filter mask.
> + */
> + list_for_each_entry(dev, &net->dev_base_head, dev_list) {
> + min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size,
> + if_nlmsg_stats_size(dev,
> + filter_mask));
> + }
Iterating over all net_devices in the namespace is quite an expensive
operation and it would now be done twice.
I understand that this code is taken over from rtnl_calcit() but there
the cost is at least only paid if ext_filter_mask is actually set and
the user opts into additional statistics.
I wonder if we can reduce the cost for the stats interface as its
purpose is to be minimal cost. I suggest we only add the loop once we
have an extension which actually depends on it. We can then try and
figure out to not require it.
^ permalink raw reply
* Re: [PATCH net-next v3 RFC 2/2] ipv6: add support for stats via RTM_GETSTATS
From: Thomas Graf @ 2016-04-16 7:55 UTC (permalink / raw)
To: Roopa Prabhu; +Cc: netdev, jhs, davem
In-Reply-To: <1460777293-39474-3-git-send-email-roopa@cumulusnetworks.com>
On 04/15/16 at 08:28pm, Roopa Prabhu wrote:
> +static size_t inet6_get_link_af_stats_size(const struct net_device *dev,
> + u32 filter_mask)
> +{
> + if (!(filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_INET6)))
> + return 0;
> +
> + if (!__in6_dev_get(dev))
> + return 0;
> +
> + return nla_total_size(sizeof(struct nlattr)) /* IFLA_STATS_LINK_INET6 */
> + + nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
> + + nla_total_size(ICMP6_MIB_MAX * sizeof(u64));/* IFLA_INET6_ICMP6STATS */
> +}
I think this is a good example. The above is an expensive way to
figure out whether you have at least one interface with IPv6
statistics. I'd suggest to turn this into:
if (filter_mask & IFLA_STATS_FILTER_BIT(IFLA_STATS_LINK_INET6)) {
size += nla_total_size(sizeof(struct nlattr)) /* IFLA_STATS_LINK_INET6 */
+ nla_total_size(IPSTATS_MIB_MAX * 8) /* IFLA_INET6_STATS */
+ nla_total_size(ICMP6_MIB_MAX * sizeof(u64));/* IFLA_INET6_ICMP6STATS */
}
... and put it into the main calcit function. The user has explicitly opted into
IPv6 statistics so I think it's not a waste to allocate resources for it in the
message. You could also make it depend on "disable_ipv6" to be more accurate but
I think even the above is good enough.
^ permalink raw reply
* [PATCH] carl9170: Clarify kconfig text
From: Lauri Kasanen @ 2016-04-16 8:33 UTC (permalink / raw)
To: Christian Lamparter; +Cc: Kalle Valo, linux-wireless, netdev, linux-kernel
The previous text was confusing, leading readers to think this
driver was a duplicate, and so didn't need to be enabled.
After the removal of the older staging driver, this is the only
driver in mainline for these devices.
Signed-off-by: Lauri Kasanen <cand@gmx.com>
---
drivers/net/wireless/ath/carl9170/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/wireless/ath/carl9170/Kconfig b/drivers/net/wireless/ath/carl9170/Kconfig
index 1a796e5..f3e3222 100644
--- a/drivers/net/wireless/ath/carl9170/Kconfig
+++ b/drivers/net/wireless/ath/carl9170/Kconfig
@@ -5,7 +5,7 @@ config CARL9170
select FW_LOADER
select CRC32
help
- This is another driver for the Atheros "otus" 802.11n USB devices.
+ This is the mainline driver for the Atheros "otus" 802.11n USB devices.
This driver provides more features than the original,
but it needs a special firmware (carl9170-1.fw) to do that.
--
2.6.2
^ permalink raw reply related
* Re: qdisc spin lock
From: Andrew @ 2016-04-16 8:52 UTC (permalink / raw)
To: Michael Ma, Jesper Dangaard Brouer; +Cc: netdev
In-Reply-To: <CAAmHdhwpVOCv=4Y+pb9PfGKWV0ooqnr7eC58ZYfRTtYjC35EFw@mail.gmail.com>
I think that it isn't a good solution - unless you can bind specified
host (src/dst) to specified txq. Usually traffic is spreaded on txqs by
src+dst IP (or even IP:port) hash which results in traffic spreading
among all mqs on device, and wrong bandwidth limiting (N*bandwidth on
multi-session load like p2p/server traffic)...
People said that hfsc shaper have better performance, but I didn't
tested it.
01.04.2016 02:41, Michael Ma пишет:
> Thanks for the suggestion - I'll try the MQ solution out. It seems to
> be able to solve the problem well with the assumption that bandwidth
> can be statically partitioned.
>
> 2016-03-31 12:18 GMT-07:00 Jesper Dangaard Brouer <brouer@redhat.com>:
>> On Wed, 30 Mar 2016 00:20:03 -0700 Michael Ma <make0818@gmail.com> wrote:
>>
>>> I know this might be an old topic so bare with me – what we are facing
>>> is that applications are sending small packets using hundreds of
>>> threads so the contention on spin lock in __dev_xmit_skb increases the
>>> latency of dev_queue_xmit significantly. We’re building a network QoS
>>> solution to avoid interference of different applications using HTB.
>> Yes, as you have noticed with HTB there is a single qdisc lock, and
>> congestion obviously happens :-)
>>
>> It is possible with different tricks to make it scale. I believe
>> Google is using a variant of HTB, and it scales for them. They have
>> not open source their modifications to HTB (which likely also involves
>> a great deal of setup tricks).
>>
>> If your purpose it to limit traffic/bandwidth per "cloud" instance,
>> then you can just use another TC setup structure. Like using MQ and
>> assigning a HTB per MQ queue (where the MQ queues are bound to each
>> CPU/HW queue)... But you have to figure out this setup yourself...
>>
>>
>>> But in this case when some applications send massive small packets in
>>> parallel, the application to be protected will get its throughput
>>> affected (because it’s doing synchronous network communication using
>>> multiple threads and throughput is sensitive to the increased latency)
>>>
>>> Here is the profiling from perf:
>>>
>>> - 67.57% iperf [kernel.kallsyms] [k] _spin_lock
>>> - 99.94% dev_queue_xmit
>>> - 96.91% _spin_lock
>>> - 2.62% __qdisc_run
>>> - 98.98% sch_direct_xmit
>>> - 99.98% _spin_lock
>>>
>>> As far as I understand the design of TC is to simplify locking schema
>>> and minimize the work in __qdisc_run so that throughput won’t be
>>> affected, especially with large packets. However if the scenario is
>>> that multiple classes in the queueing discipline only have the shaping
>>> limit, there isn’t really a necessary correlation between different
>>> classes. The only synchronization point should be when the packet is
>>> dequeued from the qdisc queue and enqueued to the transmit queue of
>>> the device. My question is – is it worth investing on avoiding the
>>> locking contention by partitioning the queue/lock so that this
>>> scenario is addressed with relatively smaller latency?
>> Yes, there is a lot go gain, but it is not easy ;-)
>>
>>> I must have oversimplified a lot of details since I’m not familiar
>>> with the TC implementation at this point – just want to get your input
>>> in terms of whether this is a worthwhile effort or there is something
>>> fundamental that I’m not aware of. If this is just a matter of quite
>>> some additional work, would also appreciate helping to outline the
>>> required work here.
>>>
>>> Also would appreciate if there is any information about the latest
>>> status of this work http://www.ijcset.com/docs/IJCSET13-04-04-113.pdf
>> This article seems to be very low quality... spelling errors, only 5
>> pages, no real code, etc.
>>
>> --
>> Best regards,
>> Jesper Dangaard Brouer
>> MSc.CS, Principal Kernel Engineer at Red Hat
>> Author of http://www.iptv-analyzer.org
>> LinkedIn: http://www.linkedin.com/in/brouer
^ permalink raw reply
* [PATCH net-next 0/6] nfp: cleanups and improvements
From: Jakub Kicinski @ 2016-04-16 10:25 UTC (permalink / raw)
To: netdev; +Cc: Jakub Kicinski
Hi!
Main purpose of this set is to get rid of doing potentially long
mdelay()s but it also contains some trivial changes I've accumulated.
First two patches fix harmless copy-paste errors, next two clean up
the documentation and remove unused defines. Patch 5 clarifies the
interpretation of RX descriptor fields. Patch 6, by far the biggest,
adds ability to perform FW reconfig asynchronously thanks to which
we can stop using mdelay().
Jakub Kicinski (6):
nfp: check the right pointer for errors
nfp: remove unnecessary static
nfp: correct names of constants in comments
nfp: remove unused suspicious mask defines
nfp: remove buggy RX buffer length validation
nfp: add async reconfiguration mechanism
drivers/net/ethernet/netronome/nfp/nfp_net.h | 12 +-
.../net/ethernet/netronome/nfp/nfp_net_common.c | 198 +++++++++++++++++----
drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h | 10 +-
.../net/ethernet/netronome/nfp/nfp_net_debugfs.c | 4 +-
4 files changed, 176 insertions(+), 48 deletions(-)
--
1.9.1
^ permalink raw reply
* [PATCH net-next 1/6] nfp: check the right pointer for errors
From: Jakub Kicinski @ 2016-04-16 10:25 UTC (permalink / raw)
To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460802354-14248-1-git-send-email-jakub.kicinski@netronome.com>
Correct checking error condition on wrong pointer -
copy/paste mistake most likely.
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
index f86a1f13d27b..d77ae4d0e4dc 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
@@ -200,7 +200,7 @@ void nfp_net_debugfs_adapter_add(struct nfp_net *nn)
/* Create queue debugging sub-tree */
queues = debugfs_create_dir("queue", nn->debugfs_dir);
- if (IS_ERR_OR_NULL(nn->debugfs_dir))
+ if (IS_ERR_OR_NULL(queues))
return;
rx = debugfs_create_dir("rx", queues);
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 3/6] nfp: correct names of constants in comments
From: Jakub Kicinski @ 2016-04-16 10:25 UTC (permalink / raw)
To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460802354-14248-1-git-send-email-jakub.kicinski@netronome.com>
Documentation in comments lacks CFG in some names.
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index 8692003aeed8..3ec950555892 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -152,9 +152,9 @@
* @NFP_NET_CFG_VERSION: Firmware version number
* @NFP_NET_CFG_STS: Status
* @NFP_NET_CFG_CAP: Capabilities (same bits as @NFP_NET_CFG_CTRL)
- * @NFP_NET_MAX_TXRINGS: Maximum number of TX rings
- * @NFP_NET_MAX_RXRINGS: Maximum number of RX rings
- * @NFP_NET_MAX_MTU: Maximum support MTU
+ * @NFP_NET_CFG_MAX_TXRINGS: Maximum number of TX rings
+ * @NFP_NET_CFG_MAX_RXRINGS: Maximum number of RX rings
+ * @NFP_NET_CFG_MAX_MTU: Maximum support MTU
* @NFP_NET_CFG_START_TXQ: Start Queue Control Queue to use for TX (PF only)
* @NFP_NET_CFG_START_RXQ: Start Queue Control Queue to use for RX (PF only)
*
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 4/6] nfp: remove unused suspicious mask defines
From: Jakub Kicinski @ 2016-04-16 10:25 UTC (permalink / raw)
To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460802354-14248-1-git-send-email-jakub.kicinski@netronome.com>
NFP_NET_RXR_MASK sounds like a mask which could be used on
NFP_NET_CFG_RXRS_ENABLE register but its value is quite
strange. In fact there are no users of this define so let's
just remove it. Same for TX rings.
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h | 4 ----
1 file changed, 4 deletions(-)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index 3ec950555892..ad6c4e31cedd 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -81,14 +81,10 @@
/**
* @NFP_NET_TXR_MAX: Maximum number of TX rings
- * @NFP_NET_TXR_MASK: Mask for TX rings
* @NFP_NET_RXR_MAX: Maximum number of RX rings
- * @NFP_NET_RXR_MASK: Mask for RX rings
*/
#define NFP_NET_TXR_MAX 64
-#define NFP_NET_TXR_MASK (NFP_NET_TXR_MAX - 1)
#define NFP_NET_RXR_MAX 64
-#define NFP_NET_RXR_MASK (NFP_NET_RXR_MAX - 1)
/**
* Read/Write config words (0x0000 - 0x002c)
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 5/6] nfp: remove buggy RX buffer length validation
From: Jakub Kicinski @ 2016-04-16 10:25 UTC (permalink / raw)
To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460802354-14248-1-git-send-email-jakub.kicinski@netronome.com>
Meaning of data_len and meta_len RX WB descriptor fields is
slightly confusing. Add a comment with a diagram clarifying
the layout. Also remove the buffer length validation:
(a) it's imprecise for static rx-offsets; (b) if firmware
is buggy enough to DMA past the end of the buffer
WARN_ON_ONCE() doesn't seem like a strong enough response.
skb_put() will do the checking for us anyway.
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
.../net/ethernet/netronome/nfp/nfp_net_common.c | 26 ++++++++++++----------
1 file changed, 14 insertions(+), 12 deletions(-)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 0bdff390c958..5235e86eb684 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1298,23 +1298,25 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
nfp_net_rx_give_one(rx_ring, new_skb, new_dma_addr);
+ /* < meta_len >
+ * <-- [rx_offset] -->
+ * ---------------------------------------------------------
+ * | [XX] | metadata | packet | XXXX |
+ * ---------------------------------------------------------
+ * <---------------- data_len --------------->
+ *
+ * The rx_offset is fixed for all packets, the meta_len can vary
+ * on a packet by packet basis. If rx_offset is set to zero
+ * (_RX_OFFSET_DYNAMIC) metadata starts at the beginning of the
+ * buffer and is immediately followed by the packet (no [XX]).
+ */
meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK;
data_len = le16_to_cpu(rxd->rxd.data_len);
- if (WARN_ON_ONCE(data_len > nn->fl_bufsz)) {
- dev_kfree_skb_any(skb);
- continue;
- }
-
- if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC) {
- /* The packet data starts after the metadata */
+ if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
skb_reserve(skb, meta_len);
- } else {
- /* The packet data starts at a fixed offset */
+ else
skb_reserve(skb, nn->rx_offset);
- }
-
- /* Adjust the SKB for the dynamic meta data pre-pended */
skb_put(skb, data_len - meta_len);
nfp_net_set_hash(nn->netdev, skb, rxd);
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 6/6] nfp: add async reconfiguration mechanism
From: Jakub Kicinski @ 2016-04-16 10:25 UTC (permalink / raw)
To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460802354-14248-1-git-send-email-jakub.kicinski@netronome.com>
Some callers of nfp_net_reconfig() are in atomic context so
we used to busy wait for commands to complete. In worst case
scenario that means locking up a core for up to 5 seconds
when a command times out. Lets add a timer-based mechanism
of asynchronously checking whether reconfiguration completed
successfully for atomic callers to use. Non-atomic callers
can now just sleep.
The approach taken is quite simple because (1) synchronous
reconfigurations always happen under RTNL (or before device
is registered); (2) we can coalesce pending reconfigs.
There is no need for request queues, timer which eventually
takes a look at reconfiguration result to report errors is
good enough.
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
drivers/net/ethernet/netronome/nfp/nfp_net.h | 12 +-
.../net/ethernet/netronome/nfp/nfp_net_common.c | 172 ++++++++++++++++++---
2 files changed, 157 insertions(+), 27 deletions(-)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 3d53fcf323eb..e744acc18ef4 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -59,8 +59,8 @@
netdev_warn((nn)->netdev, fmt, ## args); \
} while (0)
-/* Max time to wait for NFP to respond on updates (in ms) */
-#define NFP_NET_POLL_TIMEOUT 5000
+/* Max time to wait for NFP to respond on updates (in seconds) */
+#define NFP_NET_POLL_TIMEOUT 5
/* Bar allocation */
#define NFP_NET_CRTL_BAR 0
@@ -447,6 +447,10 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver,
* @shared_name: Name for shared interrupt
* @me_freq_mhz: ME clock_freq (MHz)
* @reconfig_lock: Protects HW reconfiguration request regs/machinery
+ * @reconfig_posted: Pending reconfig bits coming from async sources
+ * @reconfig_timer_active: Timer for reading reconfiguration results is pending
+ * @reconfig_sync_present: Some thread is performing synchronous reconfig
+ * @reconfig_timer: Timer for async reading of reconfig results
* @link_up: Is the link up?
* @link_status_lock: Protects @link_up and ensures atomicity with BAR reading
* @rx_coalesce_usecs: RX interrupt moderation usecs delay parameter
@@ -531,6 +535,10 @@ struct nfp_net {
spinlock_t link_status_lock;
spinlock_t reconfig_lock;
+ u32 reconfig_posted;
+ bool reconfig_timer_active;
+ bool reconfig_sync_present;
+ struct timer_list reconfig_timer;
u32 rx_coalesce_usecs;
u32 rx_coalesce_max_frames;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 5235e86eb684..fa47c14c743a 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -80,6 +80,116 @@ void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver,
put_unaligned_le32(reg, fw_ver);
}
+/* Firmware reconfig
+ *
+ * Firmware reconfig may take a while so we have two versions of it -
+ * synchronous and asynchronous (posted). All synchronous callers are holding
+ * RTNL so we don't have to worry about serializing them.
+ */
+static void nfp_net_reconfig_start(struct nfp_net *nn, u32 update)
+{
+ nn_writel(nn, NFP_NET_CFG_UPDATE, update);
+ /* ensure update is written before pinging HW */
+ nn_pci_flush(nn);
+ nfp_qcp_wr_ptr_add(nn->qcp_cfg, 1);
+}
+
+/* Pass 0 as update to run posted reconfigs. */
+static void nfp_net_reconfig_start_async(struct nfp_net *nn, u32 update)
+{
+ update |= nn->reconfig_posted;
+ nn->reconfig_posted = 0;
+
+ nfp_net_reconfig_start(nn, update);
+
+ nn->reconfig_timer_active = true;
+ mod_timer(&nn->reconfig_timer, jiffies + NFP_NET_POLL_TIMEOUT * HZ);
+}
+
+static bool nfp_net_reconfig_check_done(struct nfp_net *nn, bool last_check)
+{
+ u32 reg;
+
+ reg = nn_readl(nn, NFP_NET_CFG_UPDATE);
+ if (reg == 0)
+ return true;
+ if (reg & NFP_NET_CFG_UPDATE_ERR) {
+ nn_err(nn, "Reconfig error: 0x%08x\n", reg);
+ return true;
+ } else if (last_check) {
+ nn_err(nn, "Reconfig timeout: 0x%08x\n", reg);
+ return true;
+ }
+
+ return false;
+}
+
+static int nfp_net_reconfig_wait(struct nfp_net *nn, unsigned long deadline)
+{
+ bool timed_out = false;
+
+ /* Poll update field, waiting for NFP to ack the config */
+ while (!nfp_net_reconfig_check_done(nn, timed_out)) {
+ msleep(1);
+ timed_out = time_is_before_eq_jiffies(deadline);
+ }
+
+ if (nn_readl(nn, NFP_NET_CFG_UPDATE) & NFP_NET_CFG_UPDATE_ERR)
+ return -EIO;
+
+ return timed_out ? -EIO : 0;
+}
+
+static void nfp_net_reconfig_timer(unsigned long data)
+{
+ struct nfp_net *nn = (void *)data;
+
+ spin_lock_bh(&nn->reconfig_lock);
+
+ nn->reconfig_timer_active = false;
+
+ /* If sync caller is present it will take over from us */
+ if (nn->reconfig_sync_present)
+ goto done;
+
+ /* Read reconfig status and report errors */
+ nfp_net_reconfig_check_done(nn, true);
+
+ if (nn->reconfig_posted)
+ nfp_net_reconfig_start_async(nn, 0);
+done:
+ spin_unlock_bh(&nn->reconfig_lock);
+}
+
+/**
+ * nfp_net_reconfig_post() - Post async reconfig request
+ * @nn: NFP Net device to reconfigure
+ * @update: The value for the update field in the BAR config
+ *
+ * Record FW reconfiguration request. Reconfiguration will be kicked off
+ * whenever reconfiguration machinery is idle. Multiple requests can be
+ * merged together!
+ */
+static void nfp_net_reconfig_post(struct nfp_net *nn, u32 update)
+{
+ spin_lock_bh(&nn->reconfig_lock);
+
+ /* Sync caller will kick off async reconf when it's done, just post */
+ if (nn->reconfig_sync_present) {
+ nn->reconfig_posted |= update;
+ goto done;
+ }
+
+ /* Opportunistically check if the previous command is done */
+ if (!nn->reconfig_timer_active ||
+ nfp_net_reconfig_check_done(nn, false))
+ nfp_net_reconfig_start_async(nn, update);
+ else
+ nn->reconfig_posted |= update;
+done:
+ spin_unlock_bh(&nn->reconfig_lock);
+}
+
/**
* nfp_net_reconfig() - Reconfigure the firmware
* @nn: NFP Net device to reconfigure
@@ -93,35 +203,45 @@ void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver,
*/
int nfp_net_reconfig(struct nfp_net *nn, u32 update)
{
- int cnt, ret = 0;
- u32 new;
+ bool cancelled_timer = false;
+ u32 pre_posted_requests;
+ int ret;
spin_lock_bh(&nn->reconfig_lock);
- nn_writel(nn, NFP_NET_CFG_UPDATE, update);
- /* ensure update is written before pinging HW */
- nn_pci_flush(nn);
- nfp_qcp_wr_ptr_add(nn->qcp_cfg, 1);
+ nn->reconfig_sync_present = true;
- /* Poll update field, waiting for NFP to ack the config */
- for (cnt = 0; ; cnt++) {
- new = nn_readl(nn, NFP_NET_CFG_UPDATE);
- if (new == 0)
- break;
- if (new & NFP_NET_CFG_UPDATE_ERR) {
- nn_err(nn, "Reconfig error: 0x%08x\n", new);
- ret = -EIO;
- break;
- } else if (cnt >= NFP_NET_POLL_TIMEOUT) {
- nn_err(nn, "Reconfig timeout for 0x%08x after %dms\n",
- update, cnt);
- ret = -EIO;
- break;
- }
- mdelay(1);
+ if (nn->reconfig_timer_active) {
+ del_timer(&nn->reconfig_timer);
+ nn->reconfig_timer_active = false;
+ cancelled_timer = true;
+ }
+ pre_posted_requests = nn->reconfig_posted;
+ nn->reconfig_posted = 0;
+
+ spin_unlock_bh(&nn->reconfig_lock);
+
+ if (cancelled_timer)
+ nfp_net_reconfig_wait(nn, nn->reconfig_timer.expires);
+
+ /* Run the posted reconfigs which were issued before we started */
+ if (pre_posted_requests) {
+ nfp_net_reconfig_start(nn, pre_posted_requests);
+ nfp_net_reconfig_wait(nn, jiffies + HZ * NFP_NET_POLL_TIMEOUT);
}
+ nfp_net_reconfig_start(nn, update);
+ ret = nfp_net_reconfig_wait(nn, jiffies + HZ * NFP_NET_POLL_TIMEOUT);
+
+ spin_lock_bh(&nn->reconfig_lock);
+
+ if (nn->reconfig_posted)
+ nfp_net_reconfig_start_async(nn, 0);
+
+ nn->reconfig_sync_present = false;
+
spin_unlock_bh(&nn->reconfig_lock);
+
return ret;
}
@@ -2096,8 +2216,7 @@ static void nfp_net_set_rx_mode(struct net_device *netdev)
return;
nn_writel(nn, NFP_NET_CFG_CTRL, new_ctrl);
- if (nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN))
- return;
+ nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_GEN);
nn->ctrl = new_ctrl;
}
@@ -2405,7 +2524,7 @@ static void nfp_net_set_vxlan_port(struct nfp_net *nn, int idx, __be16 port)
be16_to_cpu(nn->vxlan_ports[i + 1]) << 16 |
be16_to_cpu(nn->vxlan_ports[i]));
- nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_VXLAN);
+ nfp_net_reconfig_post(nn, NFP_NET_CFG_UPDATE_VXLAN);
}
/**
@@ -2551,6 +2670,9 @@ struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev,
spin_lock_init(&nn->reconfig_lock);
spin_lock_init(&nn->link_status_lock);
+ setup_timer(&nn->reconfig_timer,
+ nfp_net_reconfig_timer, (unsigned long)nn);
+
return nn;
}
--
1.9.1
^ permalink raw reply related
* [PATCH net-next 2/6] nfp: remove unnecessary static
From: Jakub Kicinski @ 2016-04-16 10:25 UTC (permalink / raw)
To: netdev; +Cc: Jakub Kicinski
In-Reply-To: <1460802354-14248-1-git-send-email-jakub.kicinski@netronome.com>
There is no reason for those local variables to be static.
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
index d77ae4d0e4dc..f7c9a5bc4aa3 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
@@ -187,7 +187,7 @@ static const struct file_operations nfp_tx_q_fops = {
void nfp_net_debugfs_adapter_add(struct nfp_net *nn)
{
- static struct dentry *queues, *tx, *rx;
+ struct dentry *queues, *tx, *rx;
char int_name[16];
int i;
--
1.9.1
^ permalink raw reply related
* Grant Donation
From: Julie Leach @ 2016-04-16 11:51 UTC (permalink / raw)
--
You have a cash grant donation of $2,500.000.00 USD, I am Julie Leach;Won $310.5 million Powerball jackpot winner. To verify the genuineness of this email and my winnings, please you can read more about me by viewing the below links:
http://www.powerball.com/powerball/winners/2015/MI_Leach_093015.shtml
For more details, kindly reply to this message for complete information.
Julie Leach.
^ permalink raw reply
* [PATCH v2] carl9170: Clarify kconfig text
From: Lauri Kasanen @ 2016-04-16 14:18 UTC (permalink / raw)
To: Christian Lamparter
Cc: Kalle Valo, linux-wireless-u79uwXL29TY76Z2rM5mHXA,
netdev-u79uwXL29TY76Z2rM5mHXA,
linux-kernel-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1466768.7dLuLFSDol@debian64>
The previous text was confusing, leading readers to think this
driver was a duplicate, and so didn't need to be enabled.
After the removal of the older staging driver, this is the only
driver in mainline for these devices.
Signed-off-by: Lauri Kasanen <cand-KK0ffGbhmjU@public.gmane.org>
---
v2: Remove the mention of the previous driver, suggested by Christian.
drivers/net/wireless/ath/carl9170/Kconfig | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/drivers/net/wireless/ath/carl9170/Kconfig b/drivers/net/wireless/ath/carl9170/Kconfig
index 1a796e5..2e34bae 100644
--- a/drivers/net/wireless/ath/carl9170/Kconfig
+++ b/drivers/net/wireless/ath/carl9170/Kconfig
@@ -5,12 +5,10 @@ config CARL9170
select FW_LOADER
select CRC32
help
- This is another driver for the Atheros "otus" 802.11n USB devices.
+ This is the mainline driver for the Atheros "otus" 802.11n USB devices.
- This driver provides more features than the original,
- but it needs a special firmware (carl9170-1.fw) to do that.
^ permalink raw reply related
* Re: [PATCH] net: ipv6: Do not fix up linklocal and loopback addresses
From: Sergei Shtylyov @ 2016-04-16 14:31 UTC (permalink / raw)
To: Mike Manning, netdev
In-Reply-To: <5711ADD1.2030904@brocade.com>
Hello.
On 4/16/2016 6:13 AM, Mike Manning wrote:
> f1705ec197e7 added the option to retain user configured addresses on an
> admin down. A comment to one of the later revisions suggested using the
> IFA_F_PERMANENT flag rather than adding a user_managed boolean to the
> ifaddr struct. A side effect of this change is that link local and
> loopback addresses are also retained which is not part of the objective
> of f1705ec197e7. Add check so that these addresses are not fixed up,
> given that a related fix 70af921db6f8 ensures that they are not kept in
scripts/checkpatch.pl now enforces commit citing certain style: <12-digit
SHA1> ("<comit summary>").
> the first place, otherwise this incorrect fixup triggers a crash in fib6.
>
> Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional")
> Signed-off-by: Mike Manning <mmanning@brocade.com>
[...]
MBR, Sergei
^ permalink raw reply
* Re: [PATCH v2] carl9170: Clarify kconfig text
From: Christian Lamparter @ 2016-04-16 15:32 UTC (permalink / raw)
To: Lauri Kasanen; +Cc: Kalle Valo, linux-wireless, netdev, linux-kernel
In-Reply-To: <20160416171856.1769c0e16bcc75372407e06f@gmx.com>
On Saturday, April 16, 2016 05:18:56 PM Lauri Kasanen wrote:
> The previous text was confusing, leading readers to think this
> driver was a duplicate, and so didn't need to be enabled.
>
> After the removal of the older staging driver, this is the only
> driver in mainline for these devices.
>
> Signed-off-by: Lauri Kasanen <cand@gmx.com>
Acked-by: Christian Lamparter <chunkeey@googlemail.com>
> ---
> v2: Remove the mention of the previous driver, suggested by Christian.
Thanks!
> drivers/net/wireless/ath/carl9170/Kconfig | 8 +++-----
> 1 file changed, 3 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/wireless/ath/carl9170/Kconfig b/drivers/net/wireless/ath/carl9170/Kconfig
> index 1a796e5..2e34bae 100644
> --- a/drivers/net/wireless/ath/carl9170/Kconfig
> +++ b/drivers/net/wireless/ath/carl9170/Kconfig
> @@ -5,12 +5,10 @@ config CARL9170
> select FW_LOADER
> select CRC32
> help
> - This is another driver for the Atheros "otus" 802.11n USB devices.
> + This is the mainline driver for the Atheros "otus" 802.11n USB devices.
>
> - This driver provides more features than the original,
> - but it needs a special firmware (carl9170-1.fw) to do that.
> -
> - The firmware can be downloaded from our wiki here:
> + It needs a special firmware (carl9170-1.fw), which can be downloaded
> + from our wiki here:
> <http://wireless.kernel.org/en/users/Drivers/carl9170>
>
> If you choose to build a module, it'll be called carl9170.
>
^ permalink raw reply
* Re: WARNING: CPU: 1 PID: 2485 at drivers/net/wireless/intel/iwlwifi/pcie/trans.c:1752 iwl_trans_pcie_grab_nic_access+0x110/0x120 [iwlwifi]
From: Borislav Petkov @ 2016-04-16 15:43 UTC (permalink / raw)
To: Grumbach, Emmanuel
Cc: linux-wireless-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, linuxwifi,
Dreyfuss, Haim, Coelho, Luciano, Berg, Johannes,
kvalo-sgV2jX0FEOL9JmXXK+q4OQ@public.gmane.org,
netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, Sharon, Sara
In-Reply-To: <1460693762.2648.5.camel-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
On Fri, Apr 15, 2016 at 04:16:02AM +0000, Grumbach, Emmanuel wrote:
> [1] https://git.kernel.org/cgit/linux/kernel/git/iwlwifi/backport-iwlwi
> fi.git/
It is very strange to pull from this repo, git fetch is doing something
for a while now without any forward progress.
In any case, 4.5 is bad too, testing 4.4 now. I'm travelling currently
so the whole bisection deal will take a whole but I'll get to it
eventually.
Thanks for taking a look.
--
Regards/Gruss,
Boris.
ECO tip #101: Trim your mails when you reply. Srsly.
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [PATCH] net: ipv6: Do not fix up linklocal and loopback addresses
From: David Ahern @ 2016-04-16 18:26 UTC (permalink / raw)
To: Mike Manning, netdev
In-Reply-To: <5711ADD1.2030904@brocade.com>
On 4/15/16 9:13 PM, Mike Manning wrote:
> f1705ec197e7 added the option to retain user configured addresses on an
> admin down. A comment to one of the later revisions suggested using the
> IFA_F_PERMANENT flag rather than adding a user_managed boolean to the
> ifaddr struct. A side effect of this change is that link local and
> loopback addresses are also retained which is not part of the objective
> of f1705ec197e7. Add check so that these addresses are not fixed up,
> given that a related fix 70af921db6f8 ensures that they are not kept in
> the first place, otherwise this incorrect fixup triggers a crash in fib6.
oops in fib6_del?
>
> Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional")
> Signed-off-by: Mike Manning <mmanning@brocade.com>
> ---
for the change
Acked-by: David Ahern <dsa@cumulusnetworks.com>
^ permalink raw reply
* Re: WARNING: CPU: 1 PID: 2485 at drivers/net/wireless/intel/iwlwifi/pcie/trans.c:1752 iwl_trans_pcie_grab_nic_access+0x110/0x120 [iwlwifi]
From: Grumbach, Emmanuel @ 2016-04-16 19:53 UTC (permalink / raw)
To: bp@alien8.de
Cc: linux-kernel@vger.kernel.org, linuxwifi, Dreyfuss, Haim,
Coelho, Luciano, Berg, Johannes, kvalo@codeaurora.org,
netdev@vger.kernel.org, Sharon, Sara,
linux-wireless@vger.kernel.org
In-Reply-To: <20160416154348.GA2069@cz.tnic>
On Sat, 2016-04-16 at 17:43 +0200, Borislav Petkov wrote:
> On Fri, Apr 15, 2016 at 04:16:02AM +0000, Grumbach, Emmanuel wrote:
> > [1]
> > https://git.kernel.org/cgit/linux/kernel/git/iwlwifi/backport-iwlwi
> > fi.git/
>
> It is very strange to pull from this repo, git fetch is doing
> something
> for a while now without any forward progress.
This is not surprising at all. This tree is not a kernel tree, but
rather a backport tree. It doesn't contain anything besides the Wifi
stack and the backport code. It has no common commit with the Linux
kernel. I understand that you fetched this tree from a kernel tree but
that's not a good idea. Git will try to find a common ancestor but that
will fail... after a huge number of tentatives. You'd better clone it.
> In any case, 4.5 is bad too, testing 4.4 now.
Here you go.
> I 'm travelling currently
> so the whole bisection deal will take a whole but I'll get to it
> eventually.
I'll be glad to see something coming out of this, but I can't say I am
very optimistic.
>
> Thanks for taking a look.
>
^ permalink raw reply
* [PATCH] netfilter: ctnetlink: add more #ifdef around unused code
From: Arnd Bergmann @ 2016-04-16 20:17 UTC (permalink / raw)
To: Pablo Neira Ayuso
Cc: Arnd Bergmann, Patrick McHardy, Jozsef Kadlecsik, David S. Miller,
Daniel Borkmann, Ken-ichirou MATSUZAWA, netfilter-devel, coreteam,
netdev, linux-kernel
A recent patch removed many 'inline' annotations for static
functions in this file, which has caused warnings for functions
that are not used in a given configuration, in particular when
CONFIG_NF_CONNTRACK_EVENTS is disabled:
nf_conntrack_netlink.c:572:15: 'ctnetlink_timestamp_size' defined but not used
nf_conntrack_netlink.c:546:15: 'ctnetlink_acct_size' defined but not used
nf_conntrack_netlink.c:339:12: 'ctnetlink_label_size' defined but not used
I first tried to replace some of the existing #ifdefs with nicer
'if (IS_ENABLED())' checks, but ran into several other problems
with that, so this patch adds even more #ifdef conditionals to
avoid the remaining warnings. Another option would be to put
'__maybe_unused' annotations in place of the previous 'inline'
keyword.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: 4054ff45454a ("netfilter: ctnetlink: remove unnecessary inlining")
---
net/netfilter/nf_conntrack_netlink.c | 26 +++++++++++++++-----------
1 file changed, 15 insertions(+), 11 deletions(-)
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index caa4efe5930b..f893012986c7 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -336,6 +336,7 @@ nla_put_failure:
#endif
#ifdef CONFIG_NF_CONNTRACK_LABELS
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
static int ctnetlink_label_size(const struct nf_conn *ct)
{
struct nf_conn_labels *labels = nf_ct_labels_find(ct);
@@ -344,6 +345,7 @@ static int ctnetlink_label_size(const struct nf_conn *ct)
return 0;
return nla_total_size(labels->words * sizeof(long));
}
+#endif
static int
ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct)
@@ -526,6 +528,7 @@ nla_put_failure:
return -1;
}
+#if defined(CONFIG_NF_CONNTRACK_EVENTS) || defined(CONFIG_NETFILTER_NETLINK_GLUE_CT)
static size_t ctnetlink_proto_size(const struct nf_conn *ct)
{
struct nf_conntrack_l3proto *l3proto;
@@ -543,16 +546,6 @@ static size_t ctnetlink_proto_size(const struct nf_conn *ct)
return len;
}
-static size_t ctnetlink_acct_size(const struct nf_conn *ct)
-{
- if (!nf_ct_ext_exist(ct, NF_CT_EXT_ACCT))
- return 0;
- return 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */
- + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_PACKETS */
- + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_BYTES */
- ;
-}
-
static int ctnetlink_secctx_size(const struct nf_conn *ct)
{
#ifdef CONFIG_NF_CONNTRACK_SECMARK
@@ -568,6 +561,18 @@ static int ctnetlink_secctx_size(const struct nf_conn *ct)
return 0;
#endif
}
+#endif
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static size_t ctnetlink_acct_size(const struct nf_conn *ct)
+{
+ if (!nf_ct_ext_exist(ct, NF_CT_EXT_ACCT))
+ return 0;
+ return 2 * nla_total_size(0) /* CTA_COUNTERS_ORIG|REPL */
+ + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_PACKETS */
+ + 2 * nla_total_size(sizeof(uint64_t)) /* CTA_COUNTERS_BYTES */
+ ;
+}
static size_t ctnetlink_timestamp_size(const struct nf_conn *ct)
{
@@ -580,7 +585,6 @@ static size_t ctnetlink_timestamp_size(const struct nf_conn *ct)
#endif
}
-#ifdef CONFIG_NF_CONNTRACK_EVENTS
static size_t ctnetlink_nlmsg_size(const struct nf_conn *ct)
{
return NLMSG_ALIGN(sizeof(struct nfgenmsg))
--
2.7.0
^ permalink raw reply related
* [PATCH] devlink: fix devlink_sb_register prototype
From: Arnd Bergmann @ 2016-04-16 20:27 UTC (permalink / raw)
To: Jiri Pirko, David S. Miller
Cc: Arnd Bergmann, Ido Schimmel, netdev, linux-kernel
The devlink shared buffer interface contains two different
prototypes for devlink_sb_register, and the one that is
used when NET_DEVLINK is disabled does not work:
drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c: In function 'mlxsw_sp_buffers_init':
drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c:547:9: error: too many arguments to function 'devlink_sb_register'
return devlink_sb_register(priv_to_devlink(mlxsw_sp->core), 0,
This makes the two prototypes have the same argument list.
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Fixes: bf7974710a40 ("devlink: add shared buffer configuration")
---
include/net/devlink.h | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/include/net/devlink.h b/include/net/devlink.h
index be64218e0254..1d45b61cb320 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -184,7 +184,9 @@ static inline void devlink_port_split_set(struct devlink_port *devlink_port,
static inline int devlink_sb_register(struct devlink *devlink,
unsigned int sb_index, u32 size,
u16 ingress_pools_count,
- u16 egress_pools_count, u16 tc_count)
+ u16 egress_pools_count,
+ u16 ingress_tc_count,
+ u16 egress_tc_count)
{
return 0;
}
--
2.7.0
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox