* [RFC 05/14] nfp: very basic hstat support
From: Jakub Kicinski @ 2019-01-28 23:44 UTC (permalink / raw)
To: davem
Cc: oss-drivers, netdev, jiri, f.fainelli, andrew, mkubecek, dsahern,
simon.horman, jesse.brandeburg, maciejromanfijalkowski,
vasundhara-v.volam, michael.chan, shalomt, idosch, Jakub Kicinski
In-Reply-To: <20190128234507.32028-1-jakub.kicinski@netronome.com>
Expose basic vNIC device statistics via hstat.
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
drivers/net/ethernet/netronome/nfp/Makefile | 1 +
.../net/ethernet/netronome/nfp/nfp_hstat.c | 70 +++++++++++++++++++
drivers/net/ethernet/netronome/nfp/nfp_net.h | 3 +
.../ethernet/netronome/nfp/nfp_net_common.c | 1 +
4 files changed, 75 insertions(+)
create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_hstat.c
diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile
index 47c708f08ade..4721abe9bfbf 100644
--- a/drivers/net/ethernet/netronome/nfp/Makefile
+++ b/drivers/net/ethernet/netronome/nfp/Makefile
@@ -19,6 +19,7 @@ nfp-objs := \
nfp_app.o \
nfp_app_nic.o \
nfp_devlink.o \
+ nfp_hstat.o \
nfp_hwmon.o \
nfp_main.o \
nfp_net_common.o \
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_hstat.c b/drivers/net/ethernet/netronome/nfp/nfp_hstat.c
new file mode 100644
index 000000000000..9480d3b6caa5
--- /dev/null
+++ b/drivers/net/ethernet/netronome/nfp/nfp_hstat.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#include <net/hstats.h>
+
+#include "nfp_net.h"
+
+/* NFD per-vNIC stats */
+static int
+nfp_hstat_vnic_nfd_basic_get_rx(struct net_device *netdev,
+ struct rtnl_hstat_req *req,
+ const struct rtnl_hstat_group *grp)
+{
+ struct nfp_net *nn = netdev_priv(netdev);
+
+ rtnl_hstat_dump(req, IFLA_HSTATS_STAT_LINUX_PKTS,
+ nn_readq(nn, NFP_NET_CFG_STATS_RX_FRAMES));
+ rtnl_hstat_dump(req, IFLA_HSTATS_STAT_LINUX_BYTES,
+ nn_readq(nn, NFP_NET_CFG_STATS_RX_OCTETS));
+ return 0;
+}
+
+static const struct rtnl_hstat_group nfp_hstat_vnic_nfd_rx = {
+ .qualifiers = {
+ RTNL_HSTATS_QUALS_BASIC(DEV, RX),
+ },
+
+ .get_stats = nfp_hstat_vnic_nfd_basic_get_rx,
+ .stats = {
+ [0] = RTNL_HSTATS_STAT_LINUX_PKTS_BIT |
+ RTNL_HSTATS_STAT_LINUX_BYTES_BIT,
+ },
+ .stats_cnt = 2,
+};
+
+static int
+nfp_hstat_vnic_nfd_basic_get_tx(struct net_device *netdev,
+ struct rtnl_hstat_req *req,
+ const struct rtnl_hstat_group *grp)
+{
+ struct nfp_net *nn = netdev_priv(netdev);
+
+ rtnl_hstat_dump(req, IFLA_HSTATS_STAT_LINUX_PKTS,
+ nn_readq(nn, NFP_NET_CFG_STATS_TX_FRAMES));
+ rtnl_hstat_dump(req, IFLA_HSTATS_STAT_LINUX_BYTES,
+ nn_readq(nn, NFP_NET_CFG_STATS_TX_OCTETS));
+ return 0;
+}
+
+static const struct rtnl_hstat_group nfp_hstat_vnic_nfd_tx = {
+ .qualifiers = {
+ RTNL_HSTATS_QUALS_BASIC(DEV, TX),
+ },
+
+ .get_stats = nfp_hstat_vnic_nfd_basic_get_tx,
+ .stats = {
+ [0] = RTNL_HSTATS_STAT_LINUX_PKTS_BIT |
+ RTNL_HSTATS_STAT_LINUX_BYTES_BIT,
+ },
+ .stats_cnt = 2,
+};
+
+int nfp_net_hstat_get_groups(const struct net_device *netdev,
+ struct rtnl_hstat_req *req)
+{
+ rtnl_hstat_add_grp(req, &nfp_hstat_vnic_nfd_rx);
+ rtnl_hstat_add_grp(req, &nfp_hstat_vnic_nfd_tx);
+
+ return 0;
+}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 93de25b39bc1..08396a23edeb 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -105,6 +105,7 @@ struct nfp_eth_table_port;
struct nfp_net;
struct nfp_net_r_vector;
struct nfp_port;
+struct rtnl_hstat_req;
/* Convenience macro for wrapping descriptor index on ring size */
#define D_IDX(ring, idx) ((idx) & ((ring)->cnt - 1))
@@ -910,4 +911,6 @@ static inline void nfp_net_debugfs_dir_clean(struct dentry **dir)
}
#endif /* CONFIG_NFP_DEBUG */
+int nfp_net_hstat_get_groups(const struct net_device *dev,
+ struct rtnl_hstat_req *req);
#endif /* _NFP_NET_H_ */
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 7d2d4241498f..87ebfc3f0471 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -3531,6 +3531,7 @@ const struct net_device_ops nfp_net_netdev_ops = {
.ndo_udp_tunnel_add = nfp_net_add_vxlan_port,
.ndo_udp_tunnel_del = nfp_net_del_vxlan_port,
.ndo_bpf = nfp_net_xdp,
+ .ndo_hstat_get_groups = nfp_net_hstat_get_groups,
};
/**
--
2.19.2
^ permalink raw reply related
* [RFC 04/14] net: hstats: allow hierarchies to be built
From: Jakub Kicinski @ 2019-01-28 23:44 UTC (permalink / raw)
To: davem
Cc: oss-drivers, netdev, jiri, f.fainelli, andrew, mkubecek, dsahern,
simon.horman, jesse.brandeburg, maciejromanfijalkowski,
vasundhara-v.volam, michael.chan, shalomt, idosch, Jakub Kicinski
In-Reply-To: <20190128234507.32028-1-jakub.kicinski@netronome.com>
Allow groups to have other groups attached as children. Child
groups allow embedding different groups in the same root group
and simplify definition of dependent stats (as qualifiers don't
have to be repeated).
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
include/net/hstats.h | 6 ++++++
net/core/hstats.c | 14 ++++++++++++++
2 files changed, 20 insertions(+)
diff --git a/include/net/hstats.h b/include/net/hstats.h
index c2e8b379237a..cbbdaf93d408 100644
--- a/include/net/hstats.h
+++ b/include/net/hstats.h
@@ -37,18 +37,24 @@ struct rtnl_hstat_qualifier {
/**
* struct rtnl_hstat_group - node in the hstat hierarchy
* @qualifiers: attributes describing this group
+ * @has_children: @children array is present and NULL-terminated
* @stats_cnt: number of stats in the bitmask
* @stats: bitmask of stats present
* @get_stats: driver callback for dumping the stats
+ * @children: NULL-terminated array of groups inheriting the qualifiers
+ * @has_children has to be set for core to parse the array
*/
struct rtnl_hstat_group {
/* Note: this is *not* indexed with IFLA_* attributes! */
struct rtnl_hstat_qualifier qualifiers[RTNL_HSTATS_QUAL_CNT];
+ bool has_children;
/* Can't use bitmaps - words are variable length */
unsigned int stats_cnt;
u64 stats[DIV_ROUND_UP(IFLA_HSTATS_STAT_MAX + 1, 64)];
int (*get_stats)(struct net_device *dev, struct rtnl_hstat_req *req,
const struct rtnl_hstat_group *grp);
+
+ const struct rtnl_hstat_group *children[];
};
void rtnl_hstat_add_grp(struct rtnl_hstat_req *req,
diff --git a/net/core/hstats.c b/net/core/hstats.c
index 183a1c5dd93a..b409dd40e0c9 100644
--- a/net/core/hstats.c
+++ b/net/core/hstats.c
@@ -31,6 +31,10 @@ enum hstat_dumper_cmd {
*/
HSTAT_DCMD_GRP_LOAD,
/* dump all statitics
+ * ---------------
+ * | LOAD child0 |
+ * | LOAD child1 |
+ * ===============
*/
HSTAT_DCMD_GRP_DUMP,
/* close grp */
@@ -353,6 +357,16 @@ static int hstat_dumper_grp_dump(struct hstat_dumper *dumper)
if (err)
return err;
+ if (cmd.grp->has_children) {
+ const struct rtnl_hstat_group *const *grp;
+
+ for (grp = cmd.grp->children; *grp; grp++) {
+ err = hstat_dumper_push_grp_load(dumper, *grp);
+ if (err)
+ return err;
+ }
+ }
+
return 0;
}
--
2.19.2
^ permalink raw reply related
* [RFC 03/14] net: hstats: add basic/core functionality
From: Jakub Kicinski @ 2019-01-28 23:44 UTC (permalink / raw)
To: davem
Cc: oss-drivers, netdev, jiri, f.fainelli, andrew, mkubecek, dsahern,
simon.horman, jesse.brandeburg, maciejromanfijalkowski,
vasundhara-v.volam, michael.chan, shalomt, idosch, Jakub Kicinski
In-Reply-To: <20190128234507.32028-1-jakub.kicinski@netronome.com>
Add basic hierarchical stats. For now there is no hierarchies
or other fancy features. An ndo is added, and drivers can return
multiple groups of stats by adding the to them dump with
rtnl_hstat_add_grp().
Each group has attributes (qualifiers) which designate the direction
of the statistic (TX vs RX) and the source (device vs driver).
A handful of common statistics maintained by Linux drivers is added.
Dumping machinery is a little involved to make extensions easy.
A simple stack-based machine is employed which will in due course
help keep tracking of children and iteration over classifiers.
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
include/linux/netdevice.h | 9 +
include/net/hstats.h | 94 +++++++
include/uapi/linux/if_link.h | 45 ++++
net/core/Makefile | 2 +-
net/core/hstats.c | 497 +++++++++++++++++++++++++++++++++++
net/core/rtnetlink.c | 21 ++
6 files changed, 667 insertions(+), 1 deletion(-)
create mode 100644 include/net/hstats.h
create mode 100644 net/core/hstats.c
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e675ef97a426..9f16036312f9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -941,6 +941,8 @@ struct dev_ifalias {
char ifalias[];
};
+struct rtnl_hstat_req;
+
/*
* This structure defines the management hooks for network devices.
* The following hooks can be defined; unless noted otherwise, they are
@@ -1245,6 +1247,11 @@ struct dev_ifalias {
* that got dropped are freed/returned via xdp_return_frame().
* Returns negative number, means general error invoking ndo, meaning
* no frames were xmit'ed and core-caller will free all frames.
+ * int (*ndo_hstat_get_groups)(const struct net_device *dev,
+ * struct rtnl_hstat_req *req);
+ * This function is used to retrieve driver's groups of hierarchical stats.
+ * Driver should use rtnl_hstat_add_grp() to report its groups.
+ * See Documentation/networking/hstats.rst for details.
*/
struct net_device_ops {
int (*ndo_init)(struct net_device *dev);
@@ -1441,6 +1448,8 @@ struct net_device_ops {
u32 flags);
int (*ndo_xsk_async_xmit)(struct net_device *dev,
u32 queue_id);
+ int (*ndo_hstat_get_groups)(const struct net_device *dev,
+ struct rtnl_hstat_req *req);
};
/**
diff --git a/include/net/hstats.h b/include/net/hstats.h
new file mode 100644
index 000000000000..c2e8b379237a
--- /dev/null
+++ b/include/net/hstats.h
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#ifndef _NET_HSTATS_H
+#define _NET_HSTATS_H
+
+#include <linux/if_link.h>
+#include <linux/kernel.h>
+#include <net/netlink.h>
+
+struct net_device;
+struct sk_buff;
+
+/* Internal driver/core qualifiers used as indexes in qualifier tables
+ * and translated into IFLA_HSTATS_QUAL_* in dumps.
+ */
+enum {
+ RTNL_HSTATS_QUAL_TYPE,
+ RTNL_HSTATS_QUAL_DIRECTION,
+
+ RTNL_HSTATS_QUAL_CNT
+};
+
+struct hstat_dumper;
+struct rtnl_hstat_group;
+
+struct rtnl_hstat_req {
+ int err;
+ struct sk_buff *skb;
+ struct hstat_dumper *dumper;
+};
+
+struct rtnl_hstat_qualifier {
+ unsigned int constant;
+};
+
+/**
+ * struct rtnl_hstat_group - node in the hstat hierarchy
+ * @qualifiers: attributes describing this group
+ * @stats_cnt: number of stats in the bitmask
+ * @stats: bitmask of stats present
+ * @get_stats: driver callback for dumping the stats
+ */
+struct rtnl_hstat_group {
+ /* Note: this is *not* indexed with IFLA_* attributes! */
+ struct rtnl_hstat_qualifier qualifiers[RTNL_HSTATS_QUAL_CNT];
+ /* Can't use bitmaps - words are variable length */
+ unsigned int stats_cnt;
+ u64 stats[DIV_ROUND_UP(IFLA_HSTATS_STAT_MAX + 1, 64)];
+ int (*get_stats)(struct net_device *dev, struct rtnl_hstat_req *req,
+ const struct rtnl_hstat_group *grp);
+};
+
+void rtnl_hstat_add_grp(struct rtnl_hstat_req *req,
+ const struct rtnl_hstat_group *grp);
+
+static inline void
+rtnl_hstat_dump(struct rtnl_hstat_req *req, const int id, const u64 val)
+{
+ if (req->err)
+ return;
+ if (nla_put_u64_64bit(req->skb, id, val, IFLA_HSTATS_STAT_UNSPEC))
+ req->err = -EMSGSIZE;
+}
+
+size_t rtnl_get_link_hstats_size(const struct net_device *dev);
+size_t rtnl_get_link_hstats(struct sk_buff *skb, struct net_device *dev,
+ int *prividx);
+
+enum {
+#define RTNL_HSTAT_BIT(_name, _word) \
+ RTNL_HSTATS_STAT_ ## _name ## _BIT = \
+ BIT_ULL(IFLA_HSTATS_STAT_ ## _name - 1 - ((_word) * 64))
+
+ /* Common Linux stats */
+ RTNL_HSTAT_BIT(LINUX_PKTS, 0),
+ RTNL_HSTAT_BIT(LINUX_BYTES, 0),
+ RTNL_HSTAT_BIT(LINUX_BUSY, 0),
+ RTNL_HSTAT_BIT(LINUX_CSUM_PARTIAL, 0),
+ RTNL_HSTAT_BIT(LINUX_CSUM_COMPLETE, 0),
+ RTNL_HSTAT_BIT(LINUX_CSUM_UNNECESSARY, 0),
+ RTNL_HSTAT_BIT(LINUX_SEGMENTATION_OFFLOAD_PKTS, 0),
+#undef RTNL_HSTAT_BIT
+};
+
+/* Helper defines for common qualifier sets */
+#define RTNL_HSTATS_QUALS_BASIC(type, dir) \
+ [RTNL_HSTATS_QUAL_TYPE] = { \
+ .constant = IFLA_HSTATS_QUAL_TYPE_ ##type, \
+ }, \
+ [RTNL_HSTATS_QUAL_DIRECTION] = { \
+ .constant = IFLA_HSTATS_QUAL_DIR_ ##dir, \
+ }
+#endif
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 5b225ff63b48..55fcef81e142 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -910,6 +910,7 @@ enum {
IFLA_STATS_LINK_XSTATS_SLAVE,
IFLA_STATS_LINK_OFFLOAD_XSTATS,
IFLA_STATS_AF_SPEC,
+ IFLA_STATS_LINK_HSTATS,
__IFLA_STATS_MAX,
};
@@ -938,6 +939,50 @@ enum {
};
#define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1)
+/* These are embedded into IFLA_STATS_LINK_HSTATS:
+ * See Documentation/networking/hstats.rst for details.
+ */
+enum {
+ IFLA_HSTATS_UNSPEC,
+ IFLA_HSTATS_GROUP,
+ IFLA_HSTATS_STATS,
+ IFLA_HSTATS_QUAL_TYPE,
+ IFLA_HSTATS_QUAL_DIRECTION,
+ __IFLA_HSTATS_MAX,
+};
+#define IFLA_HSTATS_MAX (__IFLA_HSTATS_MAX - 1)
+
+enum {
+ IFLA_HSTATS_QUAL_TYPE_UNSPEC,
+ IFLA_HSTATS_QUAL_TYPE_DEV,
+ IFLA_HSTATS_QUAL_TYPE_DRV,
+ __IFLA_HSTATS_QUAL_TYPE_MAX,
+};
+#define IFLA_HSTATS_QUAL_TYPE_MAX (__IFLA_HSTATS_QUAL_TYPE_MAX - 1)
+
+enum {
+ IFLA_HSTATS_QUAL_DIR_UNSPEC,
+ IFLA_HSTATS_QUAL_DIR_RX,
+ IFLA_HSTATS_QUAL_DIR_TX,
+ __IFLA_HSTATS_QUAL_DIR_MAX,
+};
+#define IFLA_HSTATS_QUAL_DIR_MAX (__IFLA_HSTATS_QUAL_DIR_MAX - 1)
+
+enum {
+ IFLA_HSTATS_STAT_UNSPEC,
+ /* Common statistics */
+ IFLA_HSTATS_STAT_LINUX_PKTS, /* 0 */
+ IFLA_HSTATS_STAT_LINUX_BYTES,
+ IFLA_HSTATS_STAT_LINUX_BUSY,
+ IFLA_HSTATS_STAT_LINUX_CSUM_PARTIAL,
+ IFLA_HSTATS_STAT_LINUX_CSUM_COMPLETE,
+ IFLA_HSTATS_STAT_LINUX_CSUM_UNNECESSARY,
+ IFLA_HSTATS_STAT_LINUX_SEGMENTATION_OFFLOAD_PKTS,
+
+ __IFLA_HSTATS_STAT_MAX,
+};
+#define IFLA_HSTATS_STAT_MAX (__IFLA_HSTATS_STAT_MAX - 1)
+
/* XDP section */
#define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0)
diff --git a/net/core/Makefile b/net/core/Makefile
index fccd31e0e7f7..30635dfbbe9b 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
- fib_notifier.o xdp.o
+ fib_notifier.o xdp.o hstats.o
obj-y += net-sysfs.o
obj-$(CONFIG_PAGE_POOL) += page_pool.o
diff --git a/net/core/hstats.c b/net/core/hstats.c
new file mode 100644
index 000000000000..183a1c5dd93a
--- /dev/null
+++ b/net/core/hstats.c
@@ -0,0 +1,497 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
+
+#include <linux/bitmap.h>
+#include <linux/err.h>
+#include <linux/if_link.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <net/hstats.h>
+#include <net/netlink.h>
+
+/* We deploy a simple stack-based dumper to walk the hierarchies.
+ * This is the documentation format for quick analysis of the state machine:
+ *
+ * Header (in case there are move than one possibility):
+ *
+ * o | direct action 1 \ __ these are performed by the code
+ * r | direct action 2 /
+ * d | --------------
+ * e | | STACK CMD 1 | \ __ these are popped from the stack and run
+ * r v | STACK CMD 2 | / in order after current handler completes
+ * ============== <---- top of the stack before current handler
+ */
+enum hstat_dumper_cmd {
+ /* open grp
+ * put const quals
+ * ---------------
+ * | DUMP STATS |
+ * | CLOSE grp |
+ * ===============
+ */
+ HSTAT_DCMD_GRP_LOAD,
+ /* dump all statitics
+ */
+ HSTAT_DCMD_GRP_DUMP,
+ /* close grp */
+ HSTAT_DCMD_GRP_CLOSE,
+ /* count root group (netlink restart index) */
+ HSTAT_DCMD_ROOT_GRP_DONE,
+};
+
+struct hstat_dumper {
+ struct sk_buff *skb;
+ struct net_device *dev;
+ /* For sizing we only have a const pointer to dev */
+ const struct net_device *const_dev;
+ int err;
+
+ /* For calculating skb size */
+ bool sizing;
+ size_t size;
+
+ int current_root_grp;
+ int last_completed_root_grp;
+
+ u8 *cmd_stack;
+ size_t cmd_stack_top;
+ size_t cmd_stack_len;
+};
+
+struct hstat_dumper_cmd_simple {
+ u64 cmd;
+};
+
+struct hstat_dumper_cmd_grp_load {
+ const struct rtnl_hstat_group *grp;
+ u64 cmd;
+};
+
+struct hstat_dumper_cmd_grp_dump {
+ const struct rtnl_hstat_group *grp;
+ u64 cmd;
+};
+
+struct hstat_dumper_cmd_grp_close {
+ struct nlattr *nl_attr;
+ u64 cmd;
+};
+
+/* RTNL helpers */
+static const int rtnl_qual2ifla[RTNL_HSTATS_QUAL_CNT] = {
+ [RTNL_HSTATS_QUAL_TYPE] = IFLA_HSTATS_QUAL_TYPE,
+ [RTNL_HSTATS_QUAL_DIRECTION] = IFLA_HSTATS_QUAL_DIRECTION,
+};
+
+static bool rtnl_hstat_qualifier_present(const struct rtnl_hstat_qualifier *q)
+{
+ return q->constant;
+}
+
+/* Dumper basics */
+static u64 hstat_dumper_peek_cmd(struct hstat_dumper *dumper)
+{
+ return *(u64 *)(dumper->cmd_stack + dumper->cmd_stack_top - 8);
+}
+
+static int hstat_dumper_discard(struct hstat_dumper *dumper, size_t len)
+{
+ if (WARN_ON_ONCE(dumper->cmd_stack_top < len))
+ return -EINVAL;
+ dumper->cmd_stack_top -= len;
+ return 0;
+}
+
+static int hstat_dumper_pop(struct hstat_dumper *dumper, void *dst, size_t len)
+{
+ if (WARN_ON_ONCE(dumper->cmd_stack_top < len))
+ return -EINVAL;
+ dumper->cmd_stack_top -= len;
+ memcpy(dst, dumper->cmd_stack + dumper->cmd_stack_top, len);
+ return 0;
+}
+
+static bool hstat_dumper_done(struct hstat_dumper *dumper)
+{
+ return !dumper->cmd_stack_top;
+}
+
+static int hstat_dumper_error(struct hstat_dumper *dumper)
+{
+ if (WARN_ON_ONCE(dumper->cmd_stack_top && dumper->cmd_stack_top < 8))
+ return -EINVAL;
+ return 0;
+}
+
+static struct hstat_dumper *
+hstat_dumper_init(struct sk_buff *skb, const struct net_device *const_dev,
+ struct net_device *dev, int *prividx)
+{
+ struct hstat_dumper *dumper;
+
+ dumper = kzalloc(sizeof(*dumper), GFP_KERNEL);
+ if (!dumper)
+ return NULL;
+ dumper->cmd_stack = kmalloc(8096, GFP_KERNEL);
+ if (!dumper->cmd_stack) {
+ kfree(dumper);
+ return NULL;
+ }
+ dumper->cmd_stack_len = 8096;
+
+ dumper->skb = skb;
+ dumper->dev = dev;
+ dumper->const_dev = const_dev;
+ if (prividx)
+ dumper->last_completed_root_grp = *prividx;
+ else
+ dumper->sizing = true;
+
+ return dumper;
+}
+
+static void hstat_dumper_destroy(struct hstat_dumper *dumper)
+{
+ kfree(dumper->cmd_stack);
+ kfree(dumper);
+}
+
+/* Dumper pushers */
+static int
+__hstat_dumper_push_cmd(struct hstat_dumper *dumper, void *data, size_t len)
+{
+ /* All structures pushed must be multiple of 8 w/ cmd as last member */
+ if (WARN_ON_ONCE(len % 8))
+ return -EINVAL;
+
+ while (dumper->cmd_stack_len - dumper->cmd_stack_top < len) {
+ void *st;
+
+ st = krealloc(dumper->cmd_stack, dumper->cmd_stack_len * 2,
+ GFP_KERNEL);
+ if (!st)
+ return -ENOMEM;
+
+ dumper->cmd_stack = st;
+ dumper->cmd_stack_len *= 2;
+ }
+
+ memcpy(dumper->cmd_stack + dumper->cmd_stack_top, data, len);
+ dumper->cmd_stack_top += len;
+ return 0;
+}
+
+static int
+hstat_dumper_push_grp_load(struct hstat_dumper *dumper,
+ const struct rtnl_hstat_group *grp)
+{
+ struct hstat_dumper_cmd_grp_load cmd = {
+ .cmd = HSTAT_DCMD_GRP_LOAD,
+ .grp = grp,
+ };
+
+ return __hstat_dumper_push_cmd(dumper, &cmd, sizeof(cmd));
+}
+
+static int
+hstat_dumper_push_dump(struct hstat_dumper *dumper,
+ const struct rtnl_hstat_group *grp)
+{
+ struct hstat_dumper_cmd_grp_dump cmd = {
+ .cmd = HSTAT_DCMD_GRP_DUMP,
+ .grp = grp,
+ };
+
+ return __hstat_dumper_push_cmd(dumper, &cmd, sizeof(cmd));
+}
+
+static int
+hstat_dumper_push_grp_close(struct hstat_dumper *dumper, struct nlattr *nl_grp)
+{
+ struct hstat_dumper_cmd_grp_close cmd = {
+ .cmd = HSTAT_DCMD_GRP_CLOSE,
+ .nl_attr = nl_grp,
+ };
+
+ return __hstat_dumper_push_cmd(dumper, &cmd, sizeof(cmd));
+}
+
+static int
+hstat_dumper_push_root_grp_done(struct hstat_dumper *dumper)
+{
+ struct hstat_dumper_cmd_simple cmd = { HSTAT_DCMD_ROOT_GRP_DONE };
+
+ return __hstat_dumper_push_cmd(dumper, &cmd, sizeof(cmd));
+}
+
+/* Dumper actions */
+static int hstat_dumper_open_grp(struct hstat_dumper *dumper)
+{
+ struct nlattr *nl_grp;
+ int err;
+
+ if (dumper->sizing) {
+ dumper->size += nla_total_size(0); /* IFLA_HSTATS_GROUP */
+ return 0;
+ }
+
+ /* Open group nlattr and push onto the stack a close command */
+ nl_grp = nla_nest_start(dumper->skb, IFLA_HSTATS_GROUP);
+ if (!nl_grp)
+ return -EMSGSIZE;
+
+ err = hstat_dumper_push_grp_close(dumper, nl_grp);
+ if (err) {
+ nla_nest_cancel(dumper->skb, nl_grp);
+ return err;
+ }
+
+ return 0;
+}
+
+static int
+hstat_dumper_grp_put_stats(struct hstat_dumper *dumper,
+ const struct rtnl_hstat_group *grp)
+{
+ struct rtnl_hstat_req dump_req;
+ struct nlattr *nl_stats;
+ int err;
+
+ WARN_ON_ONCE(!grp->stats_cnt != !grp->get_stats);
+
+ if (!grp->stats_cnt)
+ return 0;
+
+ if (dumper->sizing) {
+ dumper->size += nla_total_size(0);
+ dumper->size += grp->stats_cnt * nla_total_size(8);
+ return 0;
+ }
+
+ nl_stats = nla_nest_start(dumper->skb, IFLA_HSTATS_STATS);
+ if (!nl_stats)
+ return -EMSGSIZE;
+
+ memset(&dump_req, 0, sizeof(dump_req));
+ dump_req.dumper = dumper;
+ dump_req.skb = dumper->skb;
+
+ err = grp->get_stats(dumper->dev, &dump_req, grp);
+ if (err)
+ goto err_cancel_stats;
+ err = dump_req.err;
+ if (err)
+ goto err_cancel_stats;
+
+ nla_nest_end(dumper->skb, nl_stats);
+ return 0;
+
+err_cancel_stats:
+ nla_nest_cancel(dumper->skb, nl_stats);
+ return err;
+}
+
+static int
+hstat_dumper_put_qual(struct hstat_dumper *dumper, int i, u32 val)
+{
+ if (dumper->sizing) {
+ dumper->size += nla_total_size(sizeof(u32));
+ return 0;
+ }
+
+ return nla_put_u32(dumper->skb, rtnl_qual2ifla[i], val);
+}
+
+/* Dumper handlers */
+static int hstat_dumper_grp_load(struct hstat_dumper *dumper)
+{
+ struct hstat_dumper_cmd_grp_load cmd;
+ int i, err;
+
+ err = hstat_dumper_pop(dumper, &cmd, sizeof(cmd));
+ if (err)
+ return err;
+ if (dumper->err)
+ return 0;
+
+ if (dumper->current_root_grp < dumper->last_completed_root_grp)
+ return 0;
+
+ err = hstat_dumper_open_grp(dumper);
+ if (err)
+ return err;
+
+ for (i = 0; i < RTNL_HSTATS_QUAL_CNT; i++) {
+ const struct rtnl_hstat_qualifier *q;
+
+ q = &cmd.grp->qualifiers[i];
+ if (!rtnl_hstat_qualifier_present(q))
+ continue;
+
+ if (q->constant) {
+ err = hstat_dumper_put_qual(dumper, i, q->constant);
+ if (err)
+ return err;
+ }
+ }
+
+ return hstat_dumper_push_dump(dumper, cmd.grp);
+}
+
+static int hstat_dumper_grp_dump(struct hstat_dumper *dumper)
+{
+ struct hstat_dumper_cmd_grp_dump cmd;
+ int err;
+
+ err = hstat_dumper_pop(dumper, &cmd, sizeof(cmd));
+ if (err)
+ return err;
+ if (dumper->err)
+ return 0;
+
+ err = hstat_dumper_grp_put_stats(dumper, cmd.grp);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int hstat_dumper_grp_close(struct hstat_dumper *dumper)
+{
+ struct hstat_dumper_cmd_grp_close cmd;
+ int err;
+
+ err = hstat_dumper_pop(dumper, &cmd, sizeof(cmd));
+ if (err)
+ return err;
+
+ if (!dumper->err)
+ nla_nest_end(dumper->skb, cmd.nl_attr);
+ else
+ nla_nest_cancel(dumper->skb, cmd.nl_attr);
+ return 0;
+}
+
+static int hstat_dumper_root_grp_done(struct hstat_dumper *dumper)
+{
+ int err;
+
+ err = hstat_dumper_discard(dumper, sizeof(u64));
+ if (err)
+ return err;
+ if (dumper->err)
+ return 0;
+
+ dumper->current_root_grp++;
+ return 0;
+}
+
+static int hstat_dumper_run(struct hstat_dumper *dumper)
+{
+ do {
+ int err;
+ u64 cmd;
+
+ err = hstat_dumper_error(dumper);
+ if (err)
+ return err;
+ if (hstat_dumper_done(dumper))
+ return 0;
+
+ cmd = hstat_dumper_peek_cmd(dumper);
+ switch (cmd) {
+ case HSTAT_DCMD_ROOT_GRP_DONE:
+ err = hstat_dumper_root_grp_done(dumper);
+ break;
+ case HSTAT_DCMD_GRP_LOAD:
+ err = hstat_dumper_grp_load(dumper);
+ break;
+ case HSTAT_DCMD_GRP_CLOSE:
+ err = hstat_dumper_grp_close(dumper);
+ break;
+ case HSTAT_DCMD_GRP_DUMP:
+ err = hstat_dumper_grp_dump(dumper);
+ break;
+ }
+ if (err && !dumper->err)
+ /* Record the errror hand keep invoking handlers,
+ * handlers will see the error and only do clean up.
+ */
+ dumper->err = err;
+ } while (true);
+
+ return dumper->err;
+}
+
+/* Driver helpers */
+void
+rtnl_hstat_add_grp(struct rtnl_hstat_req *req,
+ const struct rtnl_hstat_group *grp)
+{
+ if (!req->err)
+ req->err = hstat_dumper_push_root_grp_done(req->dumper);
+ if (!req->err)
+ req->err = hstat_dumper_push_grp_load(req->dumper, grp);
+}
+EXPORT_SYMBOL(rtnl_hstat_add_grp);
+
+/* Stack call points */
+static size_t
+__rtnl_get_link_hstats(struct sk_buff *skb, const struct net_device *const_dev,
+ struct net_device *dev, int *prividx)
+{
+ struct hstat_dumper *dumper;
+ struct rtnl_hstat_req req;
+ ssize_t ret;
+
+ if (!dev->netdev_ops || !dev->netdev_ops->ndo_hstat_get_groups)
+ return -ENODATA;
+
+ dumper = hstat_dumper_init(skb, const_dev, dev, prividx);
+ if (!dumper)
+ return -ENOMEM;
+
+ memset(&req, 0, sizeof(req));
+ req.dumper = dumper;
+
+ ret = dev->netdev_ops->ndo_hstat_get_groups(dev, &req);
+ if (ret < 0)
+ goto exit_dumper_destroy;
+ ret = req.err;
+ if (ret)
+ goto exit_dumper_destroy;
+
+ if (hstat_dumper_done(dumper))
+ return -ENODATA;
+
+ ret = hstat_dumper_run(dumper);
+ if (prividx) {
+ if (ret)
+ *prividx = dumper->current_root_grp;
+ else
+ *prividx = 0;
+ } else if (ret >= 0) {
+ ret = dumper->size;
+ }
+
+exit_dumper_destroy:
+ hstat_dumper_destroy(dumper);
+ return ret;
+}
+
+size_t rtnl_get_link_hstats_size(const struct net_device *dev)
+{
+ ssize_t ret;
+
+ ret = __rtnl_get_link_hstats(NULL, dev, NULL, NULL);
+ return ret;
+}
+
+size_t
+rtnl_get_link_hstats(struct sk_buff *skb, struct net_device *dev, int *prividx)
+{
+ ssize_t ret;
+
+ ret = __rtnl_get_link_hstats(skb, dev, dev, prividx);
+ return ret;
+}
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index f5a98082ac7a..a8112d0dca57 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -50,6 +50,7 @@
#include <net/ip.h>
#include <net/protocol.h>
#include <net/arp.h>
+#include <net/hstats.h>
#include <net/route.h>
#include <net/udp.h>
#include <net/tcp.h>
@@ -4871,6 +4872,23 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev,
*idxattr = 0;
}
+ if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_HSTATS, *idxattr)) {
+ *idxattr = IFLA_STATS_LINK_HSTATS;
+ attr = nla_nest_start(skb, IFLA_STATS_LINK_HSTATS);
+ if (!attr)
+ goto nla_put_failure;
+
+ err = rtnl_get_link_hstats(skb, dev, prividx);
+ if (err == -ENODATA)
+ nla_nest_cancel(skb, attr);
+ else
+ nla_nest_end(skb, attr);
+
+ if (err && err != -ENODATA)
+ goto nla_put_failure;
+ *idxattr = 0;
+ }
+
nlmsg_end(skb, nlh);
return 0;
@@ -4946,6 +4964,9 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev,
rcu_read_unlock();
}
+ if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_HSTATS, 0))
+ size += rtnl_get_link_hstats_size(dev);
+
return size;
}
--
2.19.2
^ permalink raw reply related
* [RFC 02/14] nfp: constify parameter to nfp_port_from_netdev()
From: Jakub Kicinski @ 2019-01-28 23:44 UTC (permalink / raw)
To: davem
Cc: oss-drivers, netdev, jiri, f.fainelli, andrew, mkubecek, dsahern,
simon.horman, jesse.brandeburg, maciejromanfijalkowski,
vasundhara-v.volam, michael.chan, shalomt, idosch, Jakub Kicinski
In-Reply-To: <20190128234507.32028-1-jakub.kicinski@netronome.com>
Make nfp_port_from_netdev() take a const parameter, otherwise
it can't be used with upcoming stats code.
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
drivers/net/ethernet/netronome/nfp/nfp_net.h | 2 +-
drivers/net/ethernet/netronome/nfp/nfp_net_repr.h | 2 +-
drivers/net/ethernet/netronome/nfp/nfp_port.c | 2 +-
drivers/net/ethernet/netronome/nfp/nfp_port.h | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index 320ec3900a32..93de25b39bc1 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -839,7 +839,7 @@ extern const char nfp_driver_version[];
extern const struct net_device_ops nfp_net_netdev_ops;
-static inline bool nfp_netdev_is_nfp_net(struct net_device *netdev)
+static inline bool nfp_netdev_is_nfp_net(const struct net_device *netdev)
{
return netdev->netdev_ops == &nfp_net_netdev_ops;
}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
index e0f13dfe1f39..7f2df74c2a7f 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.h
@@ -75,7 +75,7 @@ enum nfp_repr_type {
extern const struct net_device_ops nfp_repr_netdev_ops;
-static inline bool nfp_netdev_is_nfp_repr(struct net_device *netdev)
+static inline bool nfp_netdev_is_nfp_repr(const struct net_device *netdev)
{
return netdev->netdev_ops == &nfp_repr_netdev_ops;
}
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.c b/drivers/net/ethernet/netronome/nfp/nfp_port.c
index 86bc149ca231..81a23f9bdfc0 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.c
@@ -12,7 +12,7 @@
#include "nfp_net.h"
#include "nfp_port.h"
-struct nfp_port *nfp_port_from_netdev(struct net_device *netdev)
+struct nfp_port *nfp_port_from_netdev(const struct net_device *netdev)
{
if (nfp_netdev_is_nfp_net(netdev)) {
struct nfp_net *nn = netdev_priv(netdev);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_port.h b/drivers/net/ethernet/netronome/nfp/nfp_port.h
index b2479a2a49e5..24de9250d564 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_port.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_port.h
@@ -105,7 +105,7 @@ static inline bool nfp_port_is_vnic(const struct nfp_port *port)
int
nfp_port_set_features(struct net_device *netdev, netdev_features_t features);
-struct nfp_port *nfp_port_from_netdev(struct net_device *netdev);
+struct nfp_port *nfp_port_from_netdev(const struct net_device *netdev);
struct nfp_port *
nfp_port_from_id(struct nfp_pf *pf, enum nfp_port_type type, unsigned int id);
struct nfp_eth_table_port *__nfp_port_get_eth_port(struct nfp_port *port);
--
2.19.2
^ permalink raw reply related
* [RFC 01/14] nfp: remove unused structure
From: Jakub Kicinski @ 2019-01-28 23:44 UTC (permalink / raw)
To: davem
Cc: oss-drivers, netdev, jiri, f.fainelli, andrew, mkubecek, dsahern,
simon.horman, jesse.brandeburg, maciejromanfijalkowski,
vasundhara-v.volam, michael.chan, shalomt, idosch, Jakub Kicinski
In-Reply-To: <20190128234507.32028-1-jakub.kicinski@netronome.com>
Remove struct nfp_pair, it used to be used for TC BPF offload,
but now we only offload direct action mode which doesn't have
explicit stats.
Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
---
drivers/net/ethernet/netronome/nfp/nfp_net.h | 5 -----
1 file changed, 5 deletions(-)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index be37c2d6151c..320ec3900a32 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -447,11 +447,6 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver,
fw_ver->minor == minor;
}
-struct nfp_stat_pair {
- u64 pkts;
- u64 bytes;
-};
-
/**
* struct nfp_net_dp - NFP network device datapath data structure
* @dev: Backpointer to struct device
--
2.19.2
^ permalink raw reply related
* [RFC 00/14] netlink/hierarchical stats
From: Jakub Kicinski @ 2019-01-28 23:44 UTC (permalink / raw)
To: davem
Cc: oss-drivers, netdev, jiri, f.fainelli, andrew, mkubecek, dsahern,
simon.horman, jesse.brandeburg, maciejromanfijalkowski,
vasundhara-v.volam, michael.chan, shalomt, idosch, Jakub Kicinski
Hi!
As I tried to explain in my slides at netconf 2018 we are lacking
an expressive, standard API to report device statistics.
Networking silicon generally maintains some IEEE 802.3 and/or RMON
statistics. Today those all end up in ethtool -S. Here is a simple
attempt (admittedly very imprecise) of counting how many names driver
authors invented for IETF RFC2819 etherStatsPkts512to1023Octets
statistics (RX and TX):
$ git grep '".*512.*1023.*"' -- drivers/net/ | \
sed -e 's/.*"\(.*\)".*/\1/' | sort | uniq | wc -l
63
Interestingly only two drivers in the tree use the name the standard
gave us (etherStatsPkts512to1023, modulo case).
I set out to working on this set in an attempt to give drivers a way
to express clearly to user space standard-compliant counters.
Second most common use for custom statistics is per-queue counters.
This is where the "hierarchical" part of this set comes in, as
groups can be nested, and user space tools can handle the aggregation
inside the groups if needed.
This set also tries to address the problem of users not knowing if
a statistic is reported by hardware or the driver. Many modern drivers
use some prefix in ethtool -S to indicate MAC/PHY stats. At a quick
glance: Netronome uses "mac.", Intel "port." and Mellanox "_phy".
In this set, netlink attributes describe whether a group of statistics
is RX or TX, maintained by device or driver.
The purpose of this patch set is _not_ to replace ethtool -S. It is
an incredibly useful tool, and we will certainly continue using it.
However, for standard-based and commonly maintained statistics a more
structured API seems warranted.
There are two things missing from these patches, which I initially
planned to address as well: filtering, and refresh rate control.
Filtering doesn't need much explanation, users should be able to request
only a subset of statistics (like only SW stats or only given ID). The
bitmap of statistics in each group is there for filtering later on.
By refresh control I mean the ability for user space to indicate how
"fresh" values it expects. Sometimes reading the HW counters requires
slow register reads or FW communication, in such cases drivers may cache
the result. (Privileged) user space should be able to add a "not older
than" timestamp to indicate how fresh statistics it expects. And vice
versa, drivers can then also put the timestamp of when the statistics
were last refreshed in the dump for more precise bandwidth estimation.
Jakub Kicinski (14):
nfp: remove unused structure
nfp: constify parameter to nfp_port_from_netdev()
net: hstats: add basic/core functionality
net: hstats: allow hierarchies to be built
nfp: very basic hstat support
net: hstats: allow iterators
net: hstats: help in iteration over directions
nfp: hstats: make use of iteration for direction
nfp: hstats: add driver and device per queue statistics
net: hstats: add IEEE 802.3 and common IETF MIB/RMON stats
nfp: hstats: add IEEE/RMON ethernet port/MAC stats
net: hstats: add markers for partial groups
nfp: hstats: add a partial group of per-8021Q prio stats
Documentation: networking: describe new hstat API
Documentation/networking/hstats.rst | 590 +++++++++++++++
.../networking/hstats_flow_example.dot | 11 +
Documentation/networking/index.rst | 1 +
drivers/net/ethernet/netronome/nfp/Makefile | 1 +
.../net/ethernet/netronome/nfp/nfp_hstat.c | 474 ++++++++++++
drivers/net/ethernet/netronome/nfp/nfp_main.c | 1 +
drivers/net/ethernet/netronome/nfp/nfp_main.h | 2 +
drivers/net/ethernet/netronome/nfp/nfp_net.h | 10 +-
.../ethernet/netronome/nfp/nfp_net_common.c | 1 +
.../net/ethernet/netronome/nfp/nfp_net_repr.h | 2 +-
drivers/net/ethernet/netronome/nfp/nfp_port.c | 2 +-
drivers/net/ethernet/netronome/nfp/nfp_port.h | 2 +-
include/linux/netdevice.h | 9 +
include/net/hstats.h | 176 +++++
include/uapi/linux/if_link.h | 107 +++
net/core/Makefile | 2 +-
net/core/hstats.c | 682 ++++++++++++++++++
net/core/rtnetlink.c | 21 +
18 files changed, 2084 insertions(+), 10 deletions(-)
create mode 100644 Documentation/networking/hstats.rst
create mode 100644 Documentation/networking/hstats_flow_example.dot
create mode 100644 drivers/net/ethernet/netronome/nfp/nfp_hstat.c
create mode 100644 include/net/hstats.h
create mode 100644 net/core/hstats.c
--
2.19.2
^ permalink raw reply
* [PATCH net] net: ti: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles
From: Yang Wei @ 2019-01-28 23:40 UTC (permalink / raw)
To: f.fainelli; +Cc: davem, netdev, linux-kernel, xue.zhihong, wang.yi59, Yang Wei
dev_consume_skb_irq() should be called in cpmac_end_xmit() when
xmit done. It makes drop profiles more friendly.
Signed-off-by: Yang Wei <yang.wei9@zte.com.cn>
---
drivers/net/ethernet/ti/cpmac.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c
index 810dfc7..e2d47b2 100644
--- a/drivers/net/ethernet/ti/cpmac.c
+++ b/drivers/net/ethernet/ti/cpmac.c
@@ -608,7 +608,7 @@ static void cpmac_end_xmit(struct net_device *dev, int queue)
netdev_dbg(dev, "sent 0x%p, len=%d\n",
desc->skb, desc->skb->len);
- dev_kfree_skb_irq(desc->skb);
+ dev_consume_skb_irq(desc->skb);
desc->skb = NULL;
if (__netif_subqueue_stopped(dev, queue))
netif_wake_subqueue(dev, queue);
--
2.7.4
^ permalink raw reply related
* [PATCH net] net: apple: replace dev_kfree_skb_irq by dev_consume_skb_irq for drop profiles
From: Yang Wei @ 2019-01-28 23:39 UTC (permalink / raw)
To: davem
Cc: krzk, herbert, yuehaibing, netdev, linux-kernel, xue.zhihong,
wang.yi59, Yang Wei
dev_consume_skb_irq() should be called in bmac_txdma_intr() when
xmit done. It makes drop profiles more friendly.
Signed-off-by: Yang Wei <yang.wei9@zte.com.cn>
---
drivers/net/ethernet/apple/bmac.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c
index 6a8e256..4d3855c 100644
--- a/drivers/net/ethernet/apple/bmac.c
+++ b/drivers/net/ethernet/apple/bmac.c
@@ -777,7 +777,7 @@ static irqreturn_t bmac_txdma_intr(int irq, void *dev_id)
if (bp->tx_bufs[bp->tx_empty]) {
++dev->stats.tx_packets;
- dev_kfree_skb_irq(bp->tx_bufs[bp->tx_empty]);
+ dev_consume_skb_irq(bp->tx_bufs[bp->tx_empty]);
}
bp->tx_bufs[bp->tx_empty] = NULL;
bp->tx_fullup = 0;
--
2.7.4
^ permalink raw reply related
* Re: [PATCH bpf-next] tools: bpftool: warn about risky prog array updates
From: Daniel Borkmann @ 2019-01-28 23:35 UTC (permalink / raw)
To: Jakub Kicinski, alexei.starovoitov; +Cc: oss-drivers, netdev
In-Reply-To: <20190128182915.434-1-jakub.kicinski@netronome.com>
On 01/28/2019 07:29 PM, Jakub Kicinski wrote:
> When prog array is updated with bpftool users often refer
> to the map via the ID. Unfortunately, that's likely
> to lead to confusion because prog arrays get flushed when
> the last user reference is gone. If there is no other
> reference bpftool will create one, update successfully
> just to close the map again and have it flushed.
>
> Warn about this case in non-JSON mode.
>
> If the problem continues causing confusion we can remove
> the support for referring to a map by ID for prog array
> update completely. For now it seems like the potential
> inconvenience to users who know what they're doing outweighs
> the benefit.
>
> Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Applied, thanks!
^ permalink raw reply
* Re: [PATCH -next] selftests: bpf: remove duplicated include
From: Daniel Borkmann @ 2019-01-28 23:25 UTC (permalink / raw)
To: YueHaibing, ast, shuah; +Cc: linux-kernel, linux-kselftest, netdev
In-Reply-To: <20190125024634.14580-1-yuehaibing@huawei.com>
On 01/25/2019 03:46 AM, YueHaibing wrote:
> Remove duplicated include.
>
> Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Applied, thanks!
^ permalink raw reply
* Re: [PATCH net] sk_msg: Always cancel strp work before freeing the psock
From: Daniel Borkmann @ 2019-01-28 23:24 UTC (permalink / raw)
To: Jakub Sitnicki, netdev; +Cc: John Fastabend, Marek Majkowski
In-Reply-To: <20190128091335.20908-1-jakub@cloudflare.com>
On 01/28/2019 10:13 AM, Jakub Sitnicki wrote:
> Despite having stopped the parser, we still need to deinitialize it by
> calling strp_done so that it cancels its work. Otherwise the worker
> thread can run after we have freed the parser, and attempt to access its
> workqueue resulting in a use-after-free:
>
[...]
> Reported-by: Marek Majkowski <marek@cloudflare.com>
> Link: https://lore.kernel.org/netdev/CAJPywTLwgXNEZ2dZVoa=udiZmtrWJ0q5SuBW64aYs0Y1khXX3A@mail.gmail.com
> Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Applied, thanks!
^ permalink raw reply
* Re: [PATCH bpf] tools: bpftool: fix crash with un-owned prog arrays
From: Daniel Borkmann @ 2019-01-28 23:23 UTC (permalink / raw)
To: Jakub Kicinski, alexei.starovoitov; +Cc: oss-drivers, netdev
In-Reply-To: <20190128180121.31362-1-jakub.kicinski@netronome.com>
On 01/28/2019 07:01 PM, Jakub Kicinski wrote:
> Prog arrays don't have 'owner_prog_type' and 'owner_jited'
> fields in their fdinfo when they are created. Those fields
> are set and reported when first program is checked for
> compatibility by bpf_prog_array_compatible().
>
> This means that bpftool cannot expect the fields to always
> be there. Currently trying to show maps on a system with
> an un-owned prog array leads to a crash:
>
> $ bpftool map show
> 389: prog_array name tail_call_map flags 0x0
> Error: key 'owner_prog_type' not found in fdinfo
> Error: key 'owner_jited' not found in fdinfo
> key 4B value 4B max_entries 4 memlock 4096B
> Segmentation fault (core dumped)
>
> We pass a NULL pointer to atoi().
>
> Remove the assumption that fdinfo keys are always present.
> Add missing validations and remove the p_err() calls which
> may lead to broken JSON output as caller will not propagate
> the failure.
>
> Fixes: 99a44bef5870 ("tools: bpftool: add owner_prog_type and owner_jited to bpftool output")
> Signed-off-by: Jakub Kicinski <jakub.kicinski@netronome.com>
> Reviewed-by: Quentin Monnet <quentin.monnet@netronome.com>
Applied, thanks!
^ permalink raw reply
* Re: [PATCH] bpf/core.c - silence warning messages
From: Daniel Borkmann @ 2019-01-28 23:22 UTC (permalink / raw)
To: Song Liu, Valdis Kletnieks; +Cc: Alexei Starovoitov, Networking, open list
In-Reply-To: <CAPhsuW54z8ao4KOKG9ZjdXus6LTBH2qwAi8LgPFZ_qb_ObKsRg@mail.gmail.com>
On 01/28/2019 06:18 PM, Song Liu wrote:
> On Sun, Jan 27, 2019 at 8:43 PM <valdis.kletnieks@vt.edu> wrote:
>>
>> Compiling kernel/bpf/core.c with W=1 causes a flood of warnings:
>>
>> kernel/bpf/core.c:1198:65: warning: initialized field overwritten [-Woverride-init]
>> 1198 | #define BPF_INSN_3_TBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = true
>> | ^~~~
>> kernel/bpf/core.c:1087:2: note: in expansion of macro 'BPF_INSN_3_TBL'
>> 1087 | INSN_3(ALU, ADD, X), \
>> | ^~~~~~
>> kernel/bpf/core.c:1202:3: note: in expansion of macro 'BPF_INSN_MAP'
>> 1202 | BPF_INSN_MAP(BPF_INSN_2_TBL, BPF_INSN_3_TBL),
>> | ^~~~~~~~~~~~
>> kernel/bpf/core.c:1198:65: note: (near initialization for 'public_insntable[12]')
>> 1198 | #define BPF_INSN_3_TBL(x, y, z) [BPF_##x | BPF_##y | BPF_##z] = true
>> | ^~~~
>> kernel/bpf/core.c:1087:2: note: in expansion of macro 'BPF_INSN_3_TBL'
>> 1087 | INSN_3(ALU, ADD, X), \
>> | ^~~~~~
>> kernel/bpf/core.c:1202:3: note: in expansion of macro 'BPF_INSN_MAP'
>> 1202 | BPF_INSN_MAP(BPF_INSN_2_TBL, BPF_INSN_3_TBL),
>> | ^~~~~~~~~~~~
>>
>> 98 copies of the above.
>>
>> The attached patch silences the warnings, because we *know* we're overwriting
>> the default initializer. That leaves bpf/core.c with only 6 other warnings,
>> which become more visible in comparison.
>
> My concern is that this will also mute the warning for other parts of
> bpf/core.c.
Agree, valid concern.
> Maybe we should move bpf_opcode_in_insntable() to a separate file, and mute
> warning for that file?
I think moving in separate file would be overkill, imho. However, lets get
the kdoc and prototype warning fixed.
Thanks,
Daniel
^ permalink raw reply
* [PATCH bpf] bpf, doc: add reviewers to maintainers entry
From: Daniel Borkmann @ 2019-01-28 22:55 UTC (permalink / raw)
To: ast; +Cc: netdev, kafai, songliubraving, yhs, Daniel Borkmann
In order to better scale BPF development on netdev, we've adopted a
reviewer rotation for all BPF patches among the five of us for some
time now. Lets give credit where credit is due, and add Martin, Song
and Yonghong as official BPF reviewers to MAINTAINERS file. Also
while at it, add regex matching for BPF such that we get properly
Cc'ed for files not listed here.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
MAINTAINERS | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/MAINTAINERS b/MAINTAINERS
index 51029a4..6e13378 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2848,6 +2848,9 @@ F: include/uapi/linux/if_bonding.h
BPF (Safe dynamic programs and tools)
M: Alexei Starovoitov <ast@kernel.org>
M: Daniel Borkmann <daniel@iogearbox.net>
+R: Martin KaFai Lau <kafai@fb.com>
+R: Song Liu <songliubraving@fb.com>
+R: Yonghong Song <yhs@fb.com>
L: netdev@vger.kernel.org
L: linux-kernel@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git
@@ -2873,6 +2876,8 @@ F: samples/bpf/
F: tools/bpf/
F: tools/lib/bpf/
F: tools/testing/selftests/bpf/
+K: bpf
+N: bpf
BPF JIT for ARM
M: Shubham Bansal <illusionist.neo@gmail.com>
--
2.9.5
^ permalink raw reply related
* Re: Kernel memory corruption in CIPSO labeled TCP packets processing.
From: Paul Moore @ 2019-01-28 22:18 UTC (permalink / raw)
To: Nazarov Sergey
Cc: linux-security-module@vger.kernel.org, selinux@vger.kernel.org,
netdev@vger.kernel.org, Casey Schaufler
In-Reply-To: <1125571548681054@iva5-0acfc31d2b43.qloud-c.yandex.net>
On Mon, Jan 28, 2019 at 8:10 AM Nazarov Sergey <s-nazarov@yandex.ru> wrote:
> 25.01.2019, 19:46, "Paul Moore" <paul@paul-moore.com>:
> > Hmm, I think the above calculation should take into account the actual
> > length of the IP options, and not just the max size (calculate it
> > based on iphdr->ihl).
> >
> > Beyond that fix, I think it's time to put together a proper patchset
> > and post it to the lists for formal review/merging.
> >
> > Thanks for your work on this.
> >
> > --
> > paul moore
> > www.paul-moore.com
>
> Where we can take actual IP options length? Sorry, I'm not so familiar with linux network stack.
I'm the one who needs to apologize; you're doing it correctly. Not
sure what I was thinking there, sorry about that.
> And also, ip_options_compile could change IP options data (SSRR, LSRR, RR, TIMESTAMP options),
> so, we can't use ip_options_compile again for these options. Am I right?
If we don't pass a skb into ip_options_compile(), meaning both "skb"
and "rt" will be NULL, then I don't believe the option data will
change. Am I missing something?
--
paul moore
www.paul-moore.com
^ permalink raw reply
* [PATCHv3 0/6] Add SOCFPGA System Manager
From: thor.thayer @ 2019-01-28 22:14 UTC (permalink / raw)
To: lee.jones, arnd, dinguyen, linux, catalin.marinas, will.deacon,
peppe.cavallaro, alexandre.torgue, joabreu
Cc: davem, mcoquelin.stm32, mchehab+samsung, mark.rutland,
bjorn.andersson, olof, devicetree, linux-kernel, linux-arm-kernel,
netdev, Thor Thayer
From: Thor Thayer <thor.thayer@linux.intel.com>
Add MFD driver for SOCFPGA System Manager to handle
System Manager calls differently for ARM32 vs ARM64.
The SOCFPGA System Manager includes registers from several
SOC peripherals.
On ARM32, syscon handles this aggregated register grouping.
Implement System Manager calls as regmap_mmio similar to syscon
for ARM32 SOCFPGA systems.
The ARM64 System Manager can only be accessed from priority
level EL3 so this new MFD driver handles the calls to EL3.
v3 Changes:
Create and register regmap in probe().
Lookup functions find registered regmap.
Cleanup of header file.
Fix copyright dates.
Replace global pointer with traditional probe() methodology.
Thor Thayer (6):
mfd: altera-sysmgr: Add SOCFPGA System Manager
Documentation: dt: socfpga: Add S10 System Manager binding
ARM: socfpga_defconfig: Enable CONFIG_MTD_ALTERA_SYSMGR
arm64: defconfig: Enable CONFIG_MTD_ALTERA_SYSMGR
net: stmmac: socfpga: Use shared System Manager driver
arm64: dts: stratix10: New System Manager compatible
.../bindings/arm/altera/socfpga-system.txt | 12 ++
MAINTAINERS | 6 +
arch/arm/configs/socfpga_defconfig | 1 +
arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi | 2 +-
arch/arm64/configs/defconfig | 1 +
drivers/mfd/Kconfig | 10 +
drivers/mfd/Makefile | 1 +
drivers/mfd/altera-sysmgr.c | 210 +++++++++++++++++++++
.../net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 5 +-
include/linux/mfd/altera-sysmgr.h | 29 +++
10 files changed, 274 insertions(+), 3 deletions(-)
create mode 100644 drivers/mfd/altera-sysmgr.c
create mode 100644 include/linux/mfd/altera-sysmgr.h
--
2.7.4
^ permalink raw reply
* [PATCHv3 2/6] Documentation: dt: socfpga: Add S10 System Manager binding
From: thor.thayer @ 2019-01-28 22:14 UTC (permalink / raw)
To: lee.jones, arnd, dinguyen, linux, catalin.marinas, will.deacon,
peppe.cavallaro, alexandre.torgue, joabreu
Cc: davem, mcoquelin.stm32, mchehab+samsung, mark.rutland,
bjorn.andersson, olof, devicetree, linux-kernel, linux-arm-kernel,
netdev, Thor Thayer
In-Reply-To: <1548713655-25940-1-git-send-email-thor.thayer@linux.intel.com>
From: Thor Thayer <thor.thayer@linux.intel.com>
Add the device tree bindings for the Stratix10 System Manager.
Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
---
v2 New compatible string and usage for Stratix10
v3 No change
---
.../devicetree/bindings/arm/altera/socfpga-system.txt | 12 ++++++++++++
1 file changed, 12 insertions(+)
diff --git a/Documentation/devicetree/bindings/arm/altera/socfpga-system.txt b/Documentation/devicetree/bindings/arm/altera/socfpga-system.txt
index f4d04a067282..82edbaaa3f85 100644
--- a/Documentation/devicetree/bindings/arm/altera/socfpga-system.txt
+++ b/Documentation/devicetree/bindings/arm/altera/socfpga-system.txt
@@ -11,3 +11,15 @@ Example:
reg = <0xffd08000 0x1000>;
cpu1-start-addr = <0xffd080c4>;
};
+
+ARM64 - Stratix10
+Required properties:
+- compatible : "altr,sys-mgr-s10"
+- reg : Should contain 1 register range(address and length)
+ for system manager register.
+
+Example:
+ sysmgr@ffd12000 {
+ compatible = "altr,sys-mgr-s10";
+ reg = <0xffd12000 0x228>;
+ };
--
2.7.4
^ permalink raw reply related
* [PATCHv3 1/6] mfd: altera-sysmgr: Add SOCFPGA System Manager
From: thor.thayer @ 2019-01-28 22:14 UTC (permalink / raw)
To: lee.jones, arnd, dinguyen, linux, catalin.marinas, will.deacon,
peppe.cavallaro, alexandre.torgue, joabreu
Cc: davem, mcoquelin.stm32, mchehab+samsung, mark.rutland,
bjorn.andersson, olof, devicetree, linux-kernel, linux-arm-kernel,
netdev, Thor Thayer
In-Reply-To: <1548713655-25940-1-git-send-email-thor.thayer@linux.intel.com>
From: Thor Thayer <thor.thayer@linux.intel.com>
The SOCFPGA System Manager register block aggregates different
peripheral functions into one area.
On 32 bit ARM parts, handle in the same way as syscon.
On 64 bit ARM parts, the System Manager can only be accessed by
EL3 secure mode. Since a SMC call to EL3 is required, this new
driver uses regmaps similar to syscon to handle the SMC call.
Since regmaps abstract out the underlying register access, the
changes to drivers accessing the System Manager are minimal.
Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
---
v2 Implement Arnd's changes.
1. Change socfpga_is_s10() to check compatible string.
Add new compatible string for Stratix10 in bindings
and add proper detection method.
2. Replace base cast with resource_size_t member.
3. Change s10_sysmgr_regmap_cfg to altr_sysmgr_regmap_cfg to
be generic.
4. Always use 4 byte width.
5. Initialize the .reg_read and .reg_write in S10 case only.
6. Remove call to syscon in 32bit ARM case and handle both
ARM32 and ARM64 in of_sysmgr_register().
7. Replace IS_ERR_OR_NULL() with IS_ERR().
8. Remove compatible check functions except phandle function.
v3 Implement 2nd set of Arnd's changes.
1. Use probe to register and create the regmap.
2. Remove global pointer and use traditional probe() method
of saving altr_sysmgr in private device data.
3. Lookup function using phandle finds altr_sysmgr and
returns its regmap.
4. Fix copyright dates.
5. Remove socfpga_is_s10() function since only used 1 time.
6. Remove unused function prototypes from header file.
7. Remove the SMC defines from header file and use the
defines from the recently accepted Intel Service Layer
header (stratix10-smc.h).
---
MAINTAINERS | 6 ++
drivers/mfd/Kconfig | 10 ++
drivers/mfd/Makefile | 1 +
drivers/mfd/altera-sysmgr.c | 210 ++++++++++++++++++++++++++++++++++++++
include/linux/mfd/altera-sysmgr.h | 29 ++++++
5 files changed, 256 insertions(+)
create mode 100644 drivers/mfd/altera-sysmgr.c
create mode 100644 include/linux/mfd/altera-sysmgr.h
diff --git a/MAINTAINERS b/MAINTAINERS
index 4d04cebb4a71..0d2ccb710213 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -708,6 +708,12 @@ L: linux-gpio@vger.kernel.org
S: Maintained
F: drivers/gpio/gpio-altera.c
+ALTERA SYSTEM MANAGER DRIVER
+M: Thor Thayer <thor.thayer@linux.intel.com>
+S: Maintained
+F: drivers/mfd/altera-sysmgr.c
+F: include/linux/mfd/altera-sysgmr.h
+
ALTERA SYSTEM RESOURCE DRIVER FOR ARRIA10 DEVKIT
M: Thor Thayer <thor.thayer@linux.intel.com>
S: Maintained
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index f461460a2aeb..8629cf13520e 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -29,6 +29,16 @@ config MFD_ALTERA_A10SR
accessing the external gpio extender (LEDs & buttons) and
power supply alarms (hwmon).
+config MFD_ALTERA_SYSMGR
+ bool "Altera SOCFPGA System Manager"
+ depends on (ARCH_SOCFPGA || ARCH_STRATIX10) && OF
+ select MFD_SYSCON
+ help
+ Select this to get System Manager support for all Altera branded
+ SOCFPGAs. The SOCFPGA System Manager handles all SOCFPGAs by
+ using regmap_mmio accesses for ARM32 parts and SMC calls to
+ EL3 for ARM64 parts.
+
config MFD_ACT8945A
tristate "Active-semi ACT8945A"
select MFD_CORE
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 12980a4ad460..c649f6efed5f 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -233,6 +233,7 @@ obj-$(CONFIG_INTEL_SOC_PMIC_CHTDC_TI) += intel_soc_pmic_chtdc_ti.o
obj-$(CONFIG_MFD_MT6397) += mt6397-core.o
obj-$(CONFIG_MFD_ALTERA_A10SR) += altera-a10sr.o
+obj-$(CONFIG_MFD_ALTERA_SYSMGR) += altera-sysmgr.o
obj-$(CONFIG_MFD_SUN4I_GPADC) += sun4i-gpadc.o
obj-$(CONFIG_MFD_STM32_LPTIMER) += stm32-lptimer.o
diff --git a/drivers/mfd/altera-sysmgr.c b/drivers/mfd/altera-sysmgr.c
new file mode 100644
index 000000000000..ddc02241e265
--- /dev/null
+++ b/drivers/mfd/altera-sysmgr.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018-2019, Intel Corporation.
+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
+ * Copyright (C) 2012 Linaro Ltd.
+ *
+ * Based on syscon driver.
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/mfd/altera-sysmgr.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+/**
+ * struct altr_sysmgr - Altera SOCFPGA System Manager
+ * @regmap: the regmap used for System Manager accesses.
+ * @base : the base address for the System Manager
+ */
+struct altr_sysmgr {
+ struct regmap *regmap;
+ resource_size_t *base;
+};
+
+static struct platform_driver altr_sysmgr_driver;
+
+/**
+ * s10_protected_reg_write
+ * Write to a protected SMC register.
+ * @base: Base address of System Manager
+ * @reg: Address offset of register
+ * @val: Value to write
+ * Return: INTEL_SIP_SMC_STATUS_OK (0) on success
+ * INTEL_SIP_SMC_REG_ERROR on error
+ * INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION if not supported
+ */
+static int s10_protected_reg_write(void *base,
+ unsigned int reg, unsigned int val)
+{
+ struct arm_smccc_res result;
+ unsigned long sysmgr_base = (unsigned long)base;
+
+ arm_smccc_smc(INTEL_SIP_SMC_REG_WRITE, sysmgr_base + reg,
+ val, 0, 0, 0, 0, 0, &result);
+
+ return (int)result.a0;
+}
+
+/**
+ * s10_protected_reg_read
+ * Read the status of a protected SMC register
+ * @base: Base address of System Manager.
+ * @reg: Address of register
+ * @val: Value read.
+ * Return: INTEL_SIP_SMC_STATUS_OK (0) on success
+ * INTEL_SIP_SMC_REG_ERROR on error
+ * INTEL_SIP_SMC_RETURN_UNKNOWN_FUNCTION if not supported
+ */
+static int s10_protected_reg_read(void *base,
+ unsigned int reg, unsigned int *val)
+{
+ struct arm_smccc_res result;
+ unsigned long sysmgr_base = (unsigned long)base;
+
+ arm_smccc_smc(INTEL_SIP_SMC_REG_READ, sysmgr_base + reg,
+ 0, 0, 0, 0, 0, 0, &result);
+
+ *val = (unsigned int)result.a1;
+
+ return (int)result.a0;
+}
+
+static struct regmap_config altr_sysmgr_regmap_cfg = {
+ .name = "altr_sysmgr",
+ .reg_bits = 32,
+ .reg_stride = 4,
+ .val_bits = 32,
+ .fast_io = true,
+ .use_single_read = true,
+ .use_single_write = true,
+};
+
+/**
+ * sysmgr_match_phandle
+ * Matching function used by driver_find_device().
+ * Return: True if match is found, otherwise false.
+ */
+static int sysmgr_match_phandle(struct device *dev, void *data)
+{
+ return dev->of_node == (struct device_node *)data;
+}
+
+/**
+ * altr_sysmgr_regmap_lookup_by_phandle
+ * Find the sysmgr previous configured in probe() and return regmap property.
+ * Return: regmap if found or error if not found.
+ */
+struct regmap *altr_sysmgr_regmap_lookup_by_phandle(struct device_node *np,
+ const char *property)
+{
+ struct device *dev;
+ struct altr_sysmgr *sysmgr;
+ struct device_node *sysmgr_np;
+
+ if (property)
+ sysmgr_np = of_parse_phandle(np, property, 0);
+ else
+ sysmgr_np = np;
+
+ if (!sysmgr_np)
+ return ERR_PTR(-ENODEV);
+
+ dev = driver_find_device(&altr_sysmgr_driver.driver, NULL,
+ (void *)sysmgr_np, sysmgr_match_phandle);
+ if (!dev)
+ return ERR_PTR(-EPROBE_DEFER);
+
+ sysmgr = dev_get_drvdata(dev);
+
+ return sysmgr->regmap;
+}
+EXPORT_SYMBOL_GPL(altr_sysmgr_regmap_lookup_by_phandle);
+
+static int sysmgr_probe(struct platform_device *pdev)
+{
+ struct altr_sysmgr *sysmgr;
+ struct regmap *regmap;
+ struct resource *res;
+ struct regmap_config sysmgr_config = altr_sysmgr_regmap_cfg;
+ struct device *dev = &pdev->dev;
+ struct device_node *np = dev->of_node;
+
+ sysmgr = devm_kzalloc(dev, sizeof(*sysmgr), GFP_KERNEL);
+ if (!sysmgr)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENOENT;
+
+ sysmgr_config.max_register = resource_size(res) -
+ sysmgr_config.reg_stride;
+ if (of_device_is_compatible(np, "altr,sys-mgr-s10")) {
+ /* Need physical address for SMCC call */
+ sysmgr->base = (resource_size_t *)res->start;
+ sysmgr_config.reg_read = s10_protected_reg_read;
+ sysmgr_config.reg_write = s10_protected_reg_write;
+
+ regmap = devm_regmap_init(dev, NULL, sysmgr->base,
+ &sysmgr_config);
+ } else {
+ sysmgr->base = devm_ioremap(dev, res->start,
+ resource_size(res));
+ if (!sysmgr->base)
+ return -ENOMEM;
+
+ sysmgr_config.max_register = res->end - res->start - 3;
+ regmap = devm_regmap_init_mmio(dev, sysmgr->base,
+ &sysmgr_config);
+ }
+
+ if (IS_ERR(regmap)) {
+ pr_err("regmap init failed\n");
+ return PTR_ERR(regmap);
+ }
+
+ sysmgr->regmap = regmap;
+
+ platform_set_drvdata(pdev, sysmgr);
+
+ return 0;
+}
+
+static const struct of_device_id altr_sysmgr_of_match[] = {
+ { .compatible = "altr,sys-mgr" },
+ { .compatible = "altr,sys-mgr-s10" },
+ {},
+};
+MODULE_DEVICE_TABLE(of, altr_sysmgr_of_match);
+
+static struct platform_driver altr_sysmgr_driver = {
+ .probe = sysmgr_probe,
+ .driver = {
+ .name = "altr,system_manager",
+ .of_match_table = altr_sysmgr_of_match,
+ },
+};
+
+static int __init altr_sysmgr_init(void)
+{
+ return platform_driver_register(&altr_sysmgr_driver);
+}
+core_initcall(altr_sysmgr_init);
+
+static void __exit altr_sysmgr_exit(void)
+{
+ platform_driver_unregister(&altr_sysmgr_driver);
+}
+module_exit(altr_sysmgr_exit);
+
+MODULE_AUTHOR("Thor Thayer <>");
+MODULE_DESCRIPTION("SOCFPGA System Manager driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/altera-sysmgr.h b/include/linux/mfd/altera-sysmgr.h
new file mode 100644
index 000000000000..b1ef11a83872
--- /dev/null
+++ b/include/linux/mfd/altera-sysmgr.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018-2019 Intel Corporation
+ * Copyright (C) 2012 Freescale Semiconductor, Inc.
+ * Copyright (C) 2012 Linaro Ltd.
+ */
+
+#ifndef __LINUX_MFD_ALTERA_SYSMGR_H__
+#define __LINUX_MFD_ALTERA_SYSMGR_H__
+
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/firmware/intel/stratix10-smc.h>
+
+struct device_node;
+
+#ifdef CONFIG_MFD_ALTERA_SYSMGR
+struct regmap *altr_sysmgr_regmap_lookup_by_phandle(struct device_node *np,
+ const char *property);
+#else
+static inline struct regmap *
+altr_sysmgr_regmap_lookup_by_phandle(struct device_node *np,
+ const char *property)
+{
+ return ERR_PTR(-ENOTSUPP);
+}
+#endif
+
+#endif /* __LINUX_MFD_ALTERA_SYSMGR_H__ */
--
2.7.4
^ permalink raw reply related
* [PATCHv3 3/6] ARM: socfpga_defconfig: Enable CONFIG_MTD_ALTERA_SYSMGR
From: thor.thayer @ 2019-01-28 22:14 UTC (permalink / raw)
To: lee.jones, arnd, dinguyen, linux, catalin.marinas, will.deacon,
peppe.cavallaro, alexandre.torgue, joabreu
Cc: davem, mcoquelin.stm32, mchehab+samsung, mark.rutland,
bjorn.andersson, olof, devicetree, linux-kernel, linux-arm-kernel,
netdev, Thor Thayer
In-Reply-To: <1548713655-25940-1-git-send-email-thor.thayer@linux.intel.com>
From: Thor Thayer <thor.thayer@linux.intel.com>
Add System Manager driver by default for SOCFPGA ARM32 platforms.
Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
---
v2-3 No change
---
arch/arm/configs/socfpga_defconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/arm/configs/socfpga_defconfig b/arch/arm/configs/socfpga_defconfig
index 371fca4e1ab7..c510a32f9f0d 100644
--- a/arch/arm/configs/socfpga_defconfig
+++ b/arch/arm/configs/socfpga_defconfig
@@ -109,6 +109,7 @@ CONFIG_SENSORS_LTC2978_REGULATOR=y
CONFIG_WATCHDOG=y
CONFIG_DW_WATCHDOG=y
CONFIG_MFD_ALTERA_A10SR=y
+CONFIG_MFD_ALTERA_SYSMGR=y
CONFIG_MFD_STMPE=y
CONFIG_REGULATOR=y
CONFIG_REGULATOR_FIXED_VOLTAGE=y
--
2.7.4
^ permalink raw reply related
* [PATCHv3 5/6] net: stmmac: socfpga: Use shared System Manager driver
From: thor.thayer @ 2019-01-28 22:14 UTC (permalink / raw)
To: lee.jones, arnd, dinguyen, linux, catalin.marinas, will.deacon,
peppe.cavallaro, alexandre.torgue, joabreu
Cc: davem, mcoquelin.stm32, mchehab+samsung, mark.rutland,
bjorn.andersson, olof, devicetree, linux-kernel, linux-arm-kernel,
netdev, Thor Thayer
In-Reply-To: <1548713655-25940-1-git-send-email-thor.thayer@linux.intel.com>
From: Thor Thayer <thor.thayer@linux.intel.com>
The ARM64 System Manager requires a different method of reading
the System Manager than ARM32. A new System Manager driver was
created to steer ARM32 System Manager calls to regmap_mmio and
ARM64 System Manager calls to the new access method.
Convert from syscon to the shared System Manager driver so that
both ARM64 and ARM32 are supported.
Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
---
v2 No change to code. Update commit message.
v3 Remove the unused syscon.h header.
---
drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
index 5b3b06a0a3bf..d466e33635b0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
@@ -15,7 +15,7 @@
* Adopted from dwmac-sti.c
*/
-#include <linux/mfd/syscon.h>
+#include <linux/mfd/altera-sysmgr.h>
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_net.h>
@@ -114,7 +114,8 @@ static int socfpga_dwmac_parse_data(struct socfpga_dwmac *dwmac, struct device *
dwmac->interface = of_get_phy_mode(np);
- sys_mgr_base_addr = syscon_regmap_lookup_by_phandle(np, "altr,sysmgr-syscon");
+ sys_mgr_base_addr =
+ altr_sysmgr_regmap_lookup_by_phandle(np, "altr,sysmgr-syscon");
if (IS_ERR(sys_mgr_base_addr)) {
dev_info(dev, "No sysmgr-syscon node found\n");
return PTR_ERR(sys_mgr_base_addr);
--
2.7.4
^ permalink raw reply related
* [PATCHv3 4/6] arm64: defconfig: Enable CONFIG_MTD_ALTERA_SYSMGR
From: thor.thayer @ 2019-01-28 22:14 UTC (permalink / raw)
To: lee.jones, arnd, dinguyen, linux, catalin.marinas, will.deacon,
peppe.cavallaro, alexandre.torgue, joabreu
Cc: davem, mcoquelin.stm32, mchehab+samsung, mark.rutland,
bjorn.andersson, olof, devicetree, linux-kernel, linux-arm-kernel,
netdev, Thor Thayer
In-Reply-To: <1548713655-25940-1-git-send-email-thor.thayer@linux.intel.com>
From: Thor Thayer <thor.thayer@linux.intel.com>
Enable the Stratix10 System Manager by default.
Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
---
v2-3 No change
---
arch/arm64/configs/defconfig | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index c8432e24207e..48a312126cf7 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig
@@ -417,6 +417,7 @@ CONFIG_MESON_WATCHDOG=m
CONFIG_RENESAS_WDT=y
CONFIG_UNIPHIER_WATCHDOG=y
CONFIG_BCM2835_WDT=y
+CONFIG_MFD_ALTERA_SYSMGR=y
CONFIG_MFD_BD9571MWV=y
CONFIG_MFD_AXP20X_I2C=y
CONFIG_MFD_AXP20X_RSB=y
--
2.7.4
^ permalink raw reply related
* [PATCHv3 6/6] arm64: dts: stratix10: New System Manager compatible
From: thor.thayer @ 2019-01-28 22:14 UTC (permalink / raw)
To: lee.jones, arnd, dinguyen, linux, catalin.marinas, will.deacon,
peppe.cavallaro, alexandre.torgue, joabreu
Cc: davem, mcoquelin.stm32, mchehab+samsung, mark.rutland,
bjorn.andersson, olof, devicetree, linux-kernel, linux-arm-kernel,
netdev, Thor Thayer
In-Reply-To: <1548713655-25940-1-git-send-email-thor.thayer@linux.intel.com>
From: Thor Thayer <thor.thayer@linux.intel.com>
Use the new compatible string defined for the Stratix10
System Manager. Remove syscon since it is not correct
on this platform. Use "altr,sys-mgr" as the non-specific
fallback compatible.
Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
---
v2 New. Use new Stratix10 System Manager compatible
v3 Use "altr,sys-mgr" as the non-specific compatible.
---
arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
index b2c9bb664595..18e4e54db0bb 100644
--- a/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
+++ b/arch/arm64/boot/dts/altera/socfpga_stratix10.dtsi
@@ -363,7 +363,7 @@
};
sysmgr: sysmgr@ffd12000 {
- compatible = "altr,sys-mgr", "syscon";
+ compatible = "altr,sys-mgr-s10","altr,sys-mgr";
reg = <0xffd12000 0x228>;
};
--
2.7.4
^ permalink raw reply related
* Re: r8169 Driver - Poor Network Performance Since Kernel 4.19
From: Peter Ceiley @ 2019-01-28 22:10 UTC (permalink / raw)
To: Heiner Kallweit, Realtek linux nic maintainers; +Cc: netdev
In-Reply-To: <a123dc94-163a-36c1-5a20-a957f05430d3@gmail.com>
Hi Heiner,
Thanks for getting back to me.
No, I don't use jumbo packets.
Bandwidth is *generally* good, and iperf results to my NAS provide
over 900 Mbits/s in both circumstances. The issue seems to appear when
establishing a connection and is most notable, for example, on my
mounted NFS shares where it takes seconds (up to 10's of seconds on
larger directories) to list the contents of each directory. Once a
transfer begins on a file, I appear to get good bandwidth.
I'm unsure of the best scientific data to provide you in order to
troubleshoot this issue. Running the following
netstat -s |grep retransmitted
shows a steady increase in retransmitted segments each time I list the
contents of a remote directory, for example, running 'ls' on a
directory containing 345 media files did the following using kernel
4.19.18:
increased retransmitted segments by 21 and the 'time' command showed
the following:
real 0m19.867s
user 0m0.012s
sys 0m0.036s
The same command shows no retransmitted segments running kernel
4.18.16 and 'time' showed:
real 0m0.300s
user 0m0.004s
sys 0m0.007s
ifconfig does not show any RX/TX errors nor dropped packets in either case.
dmesg XID:
[ 2.979984] r8169 0000:03:00.0 eth0: RTL8168g/8111g,
f8:b1:56:fe:67:e0, XID 4c000800, IRQ 32
# lspci -vv
03:00.0 Ethernet controller: Realtek Semiconductor Co., Ltd.
RTL8111/8168/8411 PCI Express Gigabit Ethernet Controller (rev 0c)
Subsystem: Dell RTL8111/8168/8411 PCI Express Gigabit Ethernet Controller
Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR- FastB2B- DisINTx+
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort-
<TAbort- <MAbort- >SERR- <PERR- INTx-
Latency: 0, Cache Line Size: 64 bytes
Interrupt: pin A routed to IRQ 19
Region 0: I/O ports at d000 [size=256]
Region 2: Memory at f7b00000 (64-bit, non-prefetchable) [size=4K]
Region 4: Memory at f2100000 (64-bit, prefetchable) [size=16K]
Capabilities: [40] Power Management version 3
Flags: PMEClk- DSI- D1+ D2+ AuxCurrent=375mA
PME(D0+,D1+,D2+,D3hot+,D3cold+)
Status: D0 NoSoftRst+ PME-Enable- DSel=0 DScale=0 PME-
Capabilities: [50] MSI: Enable- Count=1/1 Maskable- 64bit+
Address: 0000000000000000 Data: 0000
Capabilities: [70] Express (v2) Endpoint, MSI 01
DevCap: MaxPayload 128 bytes, PhantFunc 0, Latency L0s
<512ns, L1 <64us
ExtTag- AttnBtn- AttnInd- PwrInd- RBE+ FLReset-
SlotPowerLimit 10.000W
DevCtl: CorrErr- NonFatalErr- FatalErr- UnsupReq-
RlxdOrd- ExtTag- PhantFunc- AuxPwr- NoSnoop-
MaxPayload 128 bytes, MaxReadReq 4096 bytes
DevSta: CorrErr+ NonFatalErr- FatalErr- UnsupReq- AuxPwr+ TransPend-
LnkCap: Port #0, Speed 2.5GT/s, Width x1, ASPM L0s L1, Exit
Latency L0s unlimited, L1 <64us
ClockPM+ Surprise- LLActRep- BwNot- ASPMOptComp+
LnkCtl: ASPM L1 Enabled; RCB 64 bytes Disabled- CommClk+
ExtSynch- ClockPM+ AutWidDis- BWInt- AutBWInt-
LnkSta: Speed 2.5GT/s (ok), Width x1 (ok)
TrErr- Train- SlotClk+ DLActive- BWMgmt- ABWMgmt-
DevCap2: Completion Timeout: Range ABCD, TimeoutDis+, LTR+,
OBFF Via message/WAKE#
AtomicOpsCap: 32bit- 64bit- 128bitCAS-
DevCtl2: Completion Timeout: 50us to 50ms, TimeoutDis-, LTR+,
OBFF Disabled
AtomicOpsCtl: ReqEn-
LnkCtl2: Target Link Speed: 2.5GT/s, EnterCompliance- SpeedDis-
Transmit Margin: Normal Operating Range,
EnterModifiedCompliance- ComplianceSOS-
Compliance De-emphasis: -6dB
LnkSta2: Current De-emphasis Level: -6dB,
EqualizationComplete-, EqualizationPhase1-
EqualizationPhase2-, EqualizationPhase3-, LinkEqualizationRequest-
Capabilities: [b0] MSI-X: Enable+ Count=4 Masked-
Vector table: BAR=4 offset=00000000
PBA: BAR=4 offset=00000800
Capabilities: [d0] Vital Product Data
pcilib: sysfs_read_vpd: read failed: Input/output error
Not readable
Capabilities: [100 v1] Advanced Error Reporting
UESta: DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt-
RxOF- MalfTLP- ECRC- UnsupReq- ACSViol-
UEMsk: DLP- SDES- TLP- FCP- CmpltTO- CmpltAbrt- UnxCmplt-
RxOF- MalfTLP- ECRC- UnsupReq- ACSViol-
UESvrt: DLP+ SDES+ TLP- FCP+ CmpltTO- CmpltAbrt- UnxCmplt-
RxOF+ MalfTLP+ ECRC- UnsupReq- ACSViol-
CESta: RxErr+ BadTLP+ BadDLLP+ Rollover- Timeout+ AdvNonFatalErr-
CEMsk: RxErr- BadTLP- BadDLLP- Rollover- Timeout- AdvNonFatalErr+
AERCap: First Error Pointer: 00, ECRCGenCap+ ECRCGenEn-
ECRCChkCap+ ECRCChkEn-
MultHdrRecCap- MultHdrRecEn- TLPPfxPres- HdrLogCap-
HeaderLog: 00000000 00000000 00000000 00000000
Capabilities: [140 v1] Virtual Channel
Caps: LPEVC=0 RefClk=100ns PATEntryBits=1
Arb: Fixed- WRR32- WRR64- WRR128-
Ctrl: ArbSelect=Fixed
Status: InProgress-
VC0: Caps: PATOffset=00 MaxTimeSlots=1 RejSnoopTrans-
Arb: Fixed- WRR32- WRR64- WRR128- TWRR128- WRR256-
Ctrl: Enable+ ID=0 ArbSelect=Fixed TC/VC=01
Status: NegoPending- InProgress-
Capabilities: [160 v1] Device Serial Number 01-00-00-00-68-4c-e0-00
Capabilities: [170 v1] Latency Tolerance Reporting
Max snoop latency: 71680ns
Max no snoop latency: 71680ns
Kernel driver in use: r8169
Kernel modules: r8169
Please let me know if you have any other ideas in terms of testing.
Thanks!
Peter.
On Tue, 29 Jan 2019 at 05:28, Heiner Kallweit <hkallweit1@gmail.com> wrote:
>
> On 28.01.2019 12:13, Peter Ceiley wrote:
> > Hi,
> >
> > I have been experiencing very poor network performance since Kernel
> > 4.19 and I'm confident it's related to the r8169 driver.
> >
> > I have no issue with kernel versions 4.18 and prior. I am experiencing
> > this issue in kernels 4.19 and 4.20 (currently running/testing with
> > 4.20.4 & 4.19.18).
> >
> > If someone could guide me in the right direction, I'm happy to help
> > troubleshoot this issue. Note that I have been keeping an eye on one
> > issue related to loading of the PHY driver, however, my symptoms
> > differ in that I still have a network connection. I have attempted to
> > reload the driver on a running system, but this does not improve the
> > situation.
> >
> > Using the proprietary r8168 driver returns my device to proper working order.
> >
> > lshw shows:
> > description: Ethernet interface
> > product: RTL8111/8168/8411 PCI Express Gigabit Ethernet Controller
> > vendor: Realtek Semiconductor Co., Ltd.
> > physical id: 0
> > bus info: pci@0000:03:00.0
> > logical name: enp3s0
> > version: 0c
> > serial:
> > size: 1Gbit/s
> > capacity: 1Gbit/s
> > width: 64 bits
> > clock: 33MHz
> > capabilities: pm msi pciexpress msix vpd bus_master cap_list
> > ethernet physical tp aui bnc mii fibre 10bt 10bt-fd 100bt 100bt-fd
> > 1000bt-fd autonegotiation
> > configuration: autonegotiation=on broadcast=yes driver=r8169
> > duplex=full firmware=rtl8168g-2_0.0.1 02/06/13 ip=192.168.1.25
> > latency=0 link=yes multicast=yes port=MII speed=1Gbit/s
> > resources: irq:19 ioport:d000(size=256)
> > memory:f7b00000-f7b00fff memory:f2100000-f2103fff
> >
> > Kind Regards,
> >
> > Peter.
> >
> Hi Peter,
>
> the description "poor network performance" is quite vague, therefore:
>
> - Can you provide any measurements?
> - iperf results before and after
> - statistics about dropped packets (rx and/or tx)
> - Do you use jumbo packets?
>
> Also help would be a "lspci -vv" output for the network card and
> the dmesg output line with the chip XID.
>
> Heiner
^ permalink raw reply
* Re: [PATCH 2/2] iwlwifi: Use struct_size() in kzalloc
From: Joe Perches @ 2019-01-28 21:57 UTC (permalink / raw)
To: YueHaibing, johannes.berg, emmanuel.grumbach, luciano.coelho,
linuxwifi, kvalo
Cc: linux-kernel, netdev, linux-wireless
In-Reply-To: <20190128064432.17576-3-yuehaibing@huawei.com>
On Mon, 2019-01-28 at 14:44 +0800, YueHaibing wrote:
> Use struct_size() in kzalloc instead of the 'regd_to_copy'
There is also the use above that in the same function
that could also be converted.
/* build a regdomain rule for every valid channel */
size_of_regd =
sizeof(struct ieee80211_regdomain) +
num_of_ch * sizeof(struct ieee80211_reg_rule);
regd = kzalloc(size_of_regd, GFP_KERNEL);
if (!regd)
return ERR_PTR(-ENOMEM);
> diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
[]
> @@ -1093,7 +1093,7 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
> const u8 *nvm_chan = cfg->nvm_type == IWL_NVM_EXT ?
> iwl_ext_nvm_channels : iwl_nvm_channels;
> struct ieee80211_regdomain *regd, *copy_rd;
> - int size_of_regd, regd_to_copy;
> + int size_of_regd;
> struct ieee80211_reg_rule *rule;
> struct regdb_ptrs *regdb_ptrs;
> enum nl80211_band band;
> @@ -1193,10 +1193,8 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
> * Narrow down regdom for unused regulatory rules to prevent hole
> * between reg rules to wmm rules.
> */
> - regd_to_copy = sizeof(struct ieee80211_regdomain) +
> - valid_rules * sizeof(struct ieee80211_reg_rule);
> -
> - copy_rd = kmemdup(regd, regd_to_copy, GFP_KERNEL);
> + copy_rd = kmemdup(regd, struct_size(regd, reg_rules, valid_rules),
> + GFP_KERNEL);
> if (!copy_rd)
> copy_rd = ERR_PTR(-ENOMEM);
>
^ permalink raw reply
* Re: bpf memory model. Was: [PATCH v4 bpf-next 1/9] bpf: introduce bpf_spin_lock
From: Alexei Starovoitov @ 2019-01-28 21:56 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Alexei Starovoitov, davem, daniel, jakub.kicinski, netdev,
kernel-team, mingo, will.deacon, Paul McKenney, jannh
In-Reply-To: <20190128092408.GD28467@hirez.programming.kicks-ass.net>
On Mon, Jan 28, 2019 at 10:24:08AM +0100, Peter Zijlstra wrote:
> On Fri, Jan 25, 2019 at 04:17:26PM -0800, Alexei Starovoitov wrote:
> > On Fri, Jan 25, 2019 at 11:23:12AM +0100, Peter Zijlstra wrote:
> > > On Thu, Jan 24, 2019 at 03:58:59PM -0800, Alexei Starovoitov wrote:
> > > > On Thu, Jan 24, 2019 at 07:01:09PM +0100, Peter Zijlstra wrote:
> > >
> > > > > And this would again be the moment where I go pester you about the BPF
> > > > > memory model :-)
> > > >
> > > > hehe :)
> > > > How do you propose to define it in a way that it applies to all archs
> > > > and yet doesn't penalize x86 ?
> > > > "Assume minimum execution ordering model" the way kernel does
> > > > unfortunately is not usable, since bpf doesn't have a luxury
> > > > of using nice #defines that convert into nops on x86.
> > >
> > > Why not? Surely the JIT can fix it up? That is, suppose you were to have
> > > smp_rmb() as a eBPF instruction then the JIT (which knows what
> > > architecture it is targeting) can simply avoid emitting instructions for
> > > it.
> >
> > I'm all for adding new instructions that solve real use cases.
> > imo bpf_spin_lock() is the first step in helping with concurrency.
> > At plumbers conference we agreed to add new sync_fetch_and_add()
> > and xchg() instructions. That's a second step.
> > smp_rmb/wmb/mb should be added as well.
> > JITs will patch them depending on architecture.
> >
> > What I want to avoid is to define the whole execution ordering model upfront.
> > We cannot say that BPF ISA is weakly ordered like alpha.
> > Most of the bpf progs are written and running on x86. We shouldn't
> > twist bpf developer's arm by artificially relaxing memory model.
> > BPF memory model is equal to memory model of underlying architecture.
> > What we can do is to make it bpf progs a bit more portable with
> > smp_rmb instructions, but we must not force weak execution on the developer.
>
> Well, I agree with only introducing bits you actually need, and my
> smp_rmb() example might have been poorly chosen, smp_load_acquire() /
> smp_store_release() might have been a far more useful example.
>
> But I disagree with the last part; we have to pick a model now;
> otherwise you'll pain yourself into a corner.
>
> Also; Alpha isn't very relevant these days; however ARM64 does seem to
> be gaining a lot of attention and that is very much a weak architecture.
> Adding strongly ordered assumptions to BPF now, will penalize them in
> the long run.
arm64 is gaining attention just like riscV is gaining it too.
BPF jit for arm64 is very solid, while BPF jit for riscV is being worked on.
BPF is not picking sides in CPU HW and ISA battles.
Memory model is CPU HW design decision. BPF ISA cannot dictate HW design.
We're not saying today that BPF is strongly ordered.
BPF load/stores are behaving differently on x86 vs arm64.
We can add new instructions, but we cannot 'define' how load/stores behave
from memory model perspective.
For example, take atomicity of single byte load/store.
Not all archs have them atomic, but we cannot say to bpf programmers
to always assume non-atomic byte loads.
> > > Similarly; could something like this not also help with the spinlock
> > > thing? Use that generic test-and-set thing for the interpreter, but
> > > provide a better implementation in the JIT?
> >
> > May be eventually. We can add cmpxchg insn, but the verifier still
> > doesn't support loops. We made a lot of progress in bounded loop research
> > over the last 2 years, but loops in bpf are still a year away.
> > We considered adding 'bpf_spin_lock' as a new instruction instead of helper call,
> > but that approach has a lot of negatives, so we went with the helper.
>
> Ah, but the loop won't be in the BPF program itself. The BPF program
> would only have had the BPF_SPIN_LOCK instruction, the JIT them emits
> code similar to queued_spin_lock()/queued_spin_unlock() (or calls to
> out-of-line versions of them).
As I said we considered exactly that and such approach has a lot of downsides
comparing with the helper approach.
Pretty much every time new feature is added we're evaluating whether it
should be new instruction or new helper. 99% of the time we go with new helper.
> There isn't anything that mandates the JIT uses the exact same locking
> routines the interpreter does, is there?
sure. This bpf_spin_lock() helper can be optimized whichever way the kernel wants.
Like bpf_map_lookup_elem() call is _inlined_ by the verifier for certain map types.
JITs don't even need to do anything. It looks like function call from bpf prog
point of view, but in JITed code it is a sequence of native instructions.
Say tomorrow we find out that bpf_prog->bpf_spin_lock()->queued_spin_lock()
takes too much time then we can inline fast path of queued_spin_lock
directly into bpf prog and save function call cost.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox