* [PATCH net-next] mlxsw: spectrum: Extend to support Spectrum-3 ASIC
From: Ido Schimmel @ 2019-08-07 10:42 UTC (permalink / raw)
To: netdev; +Cc: davem, jiri, petrm, mlxsw, Ido Schimmel
From: Jiri Pirko <jiri@mellanox.com>
Extend existing driver for Spectrum and Spectrum-2 ASICs
to support Spectrum-3 ASIC as well.
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
drivers/net/ethernet/mellanox/mlxsw/Kconfig | 6 +-
drivers/net/ethernet/mellanox/mlxsw/pci.h | 1 +
.../net/ethernet/mellanox/mlxsw/spectrum.c | 55 +++++++++++++++++++
3 files changed, 59 insertions(+), 3 deletions(-)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index 06c80343d9ed..f458fd1ce9f8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -71,7 +71,7 @@ config MLXSW_SWITCHX2
module will be called mlxsw_switchx2.
config MLXSW_SPECTRUM
- tristate "Mellanox Technologies Spectrum support"
+ tristate "Mellanox Technologies Spectrum family support"
depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV && VLAN_8021Q
depends on PSAMPLE || PSAMPLE=n
depends on BRIDGE || BRIDGE=n
@@ -87,8 +87,8 @@ config MLXSW_SPECTRUM
select NET_PTP_CLASSIFY if PTP_1588_CLOCK
default m
---help---
- This driver supports Mellanox Technologies Spectrum Ethernet
- Switch ASICs.
+ This driver supports Mellanox Technologies
+ Spectrum/Spectrum-2/Spectrum-3 Ethernet Switch ASICs.
To compile this driver as a module, choose M here: the
module will be called mlxsw_spectrum.
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.h b/drivers/net/ethernet/mellanox/mlxsw/pci.h
index 946339e13eb9..5b1323645a5d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.h
@@ -9,6 +9,7 @@
#define PCI_DEVICE_ID_MELLANOX_SWITCHX2 0xc738
#define PCI_DEVICE_ID_MELLANOX_SPECTRUM 0xcb84
#define PCI_DEVICE_ID_MELLANOX_SPECTRUM2 0xcf6c
+#define PCI_DEVICE_ID_MELLANOX_SPECTRUM3 0xcf70
#define PCI_DEVICE_ID_MELLANOX_SWITCHIB 0xcb20
#define PCI_DEVICE_ID_MELLANOX_SWITCHIB2 0xcf08
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 5a8e94c0a95a..389861ece418 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -65,6 +65,7 @@ static const struct mlxsw_fw_rev mlxsw_sp1_fw_rev = {
static const char mlxsw_sp1_driver_name[] = "mlxsw_spectrum";
static const char mlxsw_sp2_driver_name[] = "mlxsw_spectrum2";
+static const char mlxsw_sp3_driver_name[] = "mlxsw_spectrum3";
static const char mlxsw_sp_driver_version[] = "1.0";
static const unsigned char mlxsw_sp1_mac_mask[ETH_ALEN] = {
@@ -5290,6 +5291,35 @@ static struct mlxsw_driver mlxsw_sp2_driver = {
.res_query_enabled = true,
};
+static struct mlxsw_driver mlxsw_sp3_driver = {
+ .kind = mlxsw_sp3_driver_name,
+ .priv_size = sizeof(struct mlxsw_sp),
+ .init = mlxsw_sp2_init,
+ .fini = mlxsw_sp_fini,
+ .basic_trap_groups_set = mlxsw_sp_basic_trap_groups_set,
+ .port_split = mlxsw_sp_port_split,
+ .port_unsplit = mlxsw_sp_port_unsplit,
+ .sb_pool_get = mlxsw_sp_sb_pool_get,
+ .sb_pool_set = mlxsw_sp_sb_pool_set,
+ .sb_port_pool_get = mlxsw_sp_sb_port_pool_get,
+ .sb_port_pool_set = mlxsw_sp_sb_port_pool_set,
+ .sb_tc_pool_bind_get = mlxsw_sp_sb_tc_pool_bind_get,
+ .sb_tc_pool_bind_set = mlxsw_sp_sb_tc_pool_bind_set,
+ .sb_occ_snapshot = mlxsw_sp_sb_occ_snapshot,
+ .sb_occ_max_clear = mlxsw_sp_sb_occ_max_clear,
+ .sb_occ_port_pool_get = mlxsw_sp_sb_occ_port_pool_get,
+ .sb_occ_tc_port_bind_get = mlxsw_sp_sb_occ_tc_port_bind_get,
+ .flash_update = mlxsw_sp_flash_update,
+ .txhdr_construct = mlxsw_sp_txhdr_construct,
+ .resources_register = mlxsw_sp2_resources_register,
+ .params_register = mlxsw_sp2_params_register,
+ .params_unregister = mlxsw_sp2_params_unregister,
+ .ptp_transmitted = mlxsw_sp_ptp_transmitted,
+ .txhdr_len = MLXSW_TXHDR_LEN,
+ .profile = &mlxsw_sp2_config_profile,
+ .res_query_enabled = true,
+};
+
bool mlxsw_sp_port_dev_check(const struct net_device *dev)
{
return dev->netdev_ops == &mlxsw_sp_port_netdev_ops;
@@ -6324,6 +6354,16 @@ static struct pci_driver mlxsw_sp2_pci_driver = {
.id_table = mlxsw_sp2_pci_id_table,
};
+static const struct pci_device_id mlxsw_sp3_pci_id_table[] = {
+ {PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM3), 0},
+ {0, },
+};
+
+static struct pci_driver mlxsw_sp3_pci_driver = {
+ .name = mlxsw_sp3_driver_name,
+ .id_table = mlxsw_sp3_pci_id_table,
+};
+
static int __init mlxsw_sp_module_init(void)
{
int err;
@@ -6339,6 +6379,10 @@ static int __init mlxsw_sp_module_init(void)
if (err)
goto err_sp2_core_driver_register;
+ err = mlxsw_core_driver_register(&mlxsw_sp3_driver);
+ if (err)
+ goto err_sp3_core_driver_register;
+
err = mlxsw_pci_driver_register(&mlxsw_sp1_pci_driver);
if (err)
goto err_sp1_pci_driver_register;
@@ -6347,11 +6391,19 @@ static int __init mlxsw_sp_module_init(void)
if (err)
goto err_sp2_pci_driver_register;
+ err = mlxsw_pci_driver_register(&mlxsw_sp3_pci_driver);
+ if (err)
+ goto err_sp3_pci_driver_register;
+
return 0;
+err_sp3_pci_driver_register:
+ mlxsw_pci_driver_unregister(&mlxsw_sp2_pci_driver);
err_sp2_pci_driver_register:
mlxsw_pci_driver_unregister(&mlxsw_sp1_pci_driver);
err_sp1_pci_driver_register:
+ mlxsw_core_driver_unregister(&mlxsw_sp3_driver);
+err_sp3_core_driver_register:
mlxsw_core_driver_unregister(&mlxsw_sp2_driver);
err_sp2_core_driver_register:
mlxsw_core_driver_unregister(&mlxsw_sp1_driver);
@@ -6363,8 +6415,10 @@ static int __init mlxsw_sp_module_init(void)
static void __exit mlxsw_sp_module_exit(void)
{
+ mlxsw_pci_driver_unregister(&mlxsw_sp3_pci_driver);
mlxsw_pci_driver_unregister(&mlxsw_sp2_pci_driver);
mlxsw_pci_driver_unregister(&mlxsw_sp1_pci_driver);
+ mlxsw_core_driver_unregister(&mlxsw_sp3_driver);
mlxsw_core_driver_unregister(&mlxsw_sp2_driver);
mlxsw_core_driver_unregister(&mlxsw_sp1_driver);
unregister_inet6addr_validator_notifier(&mlxsw_sp_inet6addr_valid_nb);
@@ -6379,4 +6433,5 @@ MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
MODULE_DESCRIPTION("Mellanox Spectrum driver");
MODULE_DEVICE_TABLE(pci, mlxsw_sp1_pci_id_table);
MODULE_DEVICE_TABLE(pci, mlxsw_sp2_pci_id_table);
+MODULE_DEVICE_TABLE(pci, mlxsw_sp3_pci_id_table);
MODULE_FIRMWARE(MLXSW_SP1_FW_FILENAME);
--
2.21.0
^ permalink raw reply related
* [PATCH net-next 10/10] drop_monitor: Expose tail drop counter
From: Ido Schimmel @ 2019-08-07 10:30 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
Ido Schimmel
In-Reply-To: <20190807103059.15270-1-idosch@idosch.org>
From: Ido Schimmel <idosch@mellanox.com>
Previous patch made the length of the per-CPU skb drop list
configurable. Expose a counter that shows how many packets could not be
enqueued to this list.
This allows users determine the desired queue length.
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
include/uapi/linux/net_dropmon.h | 10 +++
net/core/drop_monitor.c | 101 +++++++++++++++++++++++++++++++
2 files changed, 111 insertions(+)
diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 8658bcd07e0e..a2d253683237 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -56,6 +56,8 @@ enum {
NET_DM_CMD_PACKET_ALERT,
NET_DM_CMD_CONFIG_GET,
NET_DM_CMD_CONFIG_NEW,
+ NET_DM_CMD_STATS_GET,
+ NET_DM_CMD_STATS_NEW,
_NET_DM_CMD_MAX,
};
@@ -79,6 +81,7 @@ enum net_dm_attr {
NET_DM_ATTR_TRUNC_LEN, /* u32 */
NET_DM_ATTR_ORIG_LEN, /* u32 */
NET_DM_ATTR_QUEUE_LEN, /* u32 */
+ NET_DM_ATTR_STATS, /* nested */
__NET_DM_ATTR_MAX,
NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1
@@ -102,4 +105,11 @@ enum {
NET_DM_ATTR_PORT_MAX = __NET_DM_ATTR_PORT_MAX - 1
};
+enum {
+ NET_DM_ATTR_STATS_DROPPED, /* u64 */
+
+ __NET_DM_ATTR_STATS_MAX,
+ NET_DM_ATTR_STATS_MAX = __NET_DM_ATTR_STATS_MAX - 1
+};
+
#endif
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index c9b68a093e0f..59c57154485c 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -51,12 +51,18 @@ static int trace_state = TRACE_OFF;
*/
static DEFINE_MUTEX(net_dm_mutex);
+struct net_dm_stats {
+ u64 dropped;
+ struct u64_stats_sync syncp;
+};
+
struct per_cpu_dm_data {
spinlock_t lock; /* Protects 'skb' and 'send_timer' */
struct sk_buff *skb;
struct sk_buff_head drop_queue;
struct work_struct dm_alert_work;
struct timer_list send_timer;
+ struct net_dm_stats stats;
};
struct dm_hw_stat_delta {
@@ -300,6 +306,9 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
unlock_free:
spin_unlock_irqrestore(&data->drop_queue.lock, flags);
+ u64_stats_update_begin(&data->stats.syncp);
+ data->stats.dropped++;
+ u64_stats_update_end(&data->stats.syncp);
consume_skb(nskb);
}
@@ -727,6 +736,93 @@ static int net_dm_cmd_config_get(struct sk_buff *skb, struct genl_info *info)
return rc;
}
+static void net_dm_stats_read(struct net_dm_stats *stats)
+{
+ int cpu;
+
+ memset(stats, 0, sizeof(*stats));
+ for_each_possible_cpu(cpu) {
+ struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
+ struct net_dm_stats *cpu_stats = &data->stats;
+ unsigned int start;
+ u64 dropped;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+ dropped = cpu_stats->dropped;
+ } while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
+
+ stats->dropped += dropped;
+ }
+}
+
+static int net_dm_stats_put(struct sk_buff *msg)
+{
+ struct net_dm_stats stats;
+ struct nlattr *attr;
+
+ net_dm_stats_read(&stats);
+
+ attr = nla_nest_start(msg, NET_DM_ATTR_STATS);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED,
+ stats.dropped, NET_DM_ATTR_PAD))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static int net_dm_stats_fill(struct sk_buff *msg, struct genl_info *info)
+{
+ void *hdr;
+ int rc;
+
+ hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+ &net_drop_monitor_family, 0, NET_DM_CMD_STATS_NEW);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ rc = net_dm_stats_put(msg);
+ if (rc)
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int net_dm_cmd_stats_get(struct sk_buff *skb, struct genl_info *info)
+{
+ struct sk_buff *msg;
+ int rc;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ rc = net_dm_stats_fill(msg, info);
+ if (rc)
+ goto free_msg;
+
+ return genlmsg_reply(msg, info);
+
+free_msg:
+ nlmsg_free(msg);
+ return rc;
+}
+
static int dropmon_net_event(struct notifier_block *ev_block,
unsigned long event, void *ptr)
{
@@ -794,6 +890,10 @@ static const struct genl_ops dropmon_ops[] = {
.cmd = NET_DM_CMD_CONFIG_GET,
.doit = net_dm_cmd_config_get,
},
+ {
+ .cmd = NET_DM_CMD_STATS_GET,
+ .doit = net_dm_cmd_stats_get,
+ },
};
static int net_dm_nl_pre_doit(const struct genl_ops *ops,
@@ -860,6 +960,7 @@ static int __init init_net_drop_monitor(void)
data = &per_cpu(dm_cpu_data, cpu);
spin_lock_init(&data->lock);
skb_queue_head_init(&data->drop_queue);
+ u64_stats_init(&data->stats.syncp);
}
goto out;
--
2.21.0
^ permalink raw reply related
* [PATCH net-next 09/10] drop_monitor: Make drop queue length configurable
From: Ido Schimmel @ 2019-08-07 10:30 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
Ido Schimmel
In-Reply-To: <20190807103059.15270-1-idosch@idosch.org>
From: Ido Schimmel <idosch@mellanox.com>
In packet alert mode, each CPU holds a list of dropped skbs that need to
be processed in process context and sent to user space. To avoid
exhausting the system's memory the maximum length of this queue is
currently set to 1000.
Allow users to tune the length of this queue according to their needs.
The configured length is reported to user space when drop monitor
configuration is queried.
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
include/uapi/linux/net_dropmon.h | 1 +
net/core/drop_monitor.c | 19 ++++++++++++++++---
2 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 7b15f632c045..8658bcd07e0e 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -78,6 +78,7 @@ enum net_dm_attr {
NET_DM_ATTR_PAD,
NET_DM_ATTR_TRUNC_LEN, /* u32 */
NET_DM_ATTR_ORIG_LEN, /* u32 */
+ NET_DM_ATTR_QUEUE_LEN, /* u32 */
__NET_DM_ATTR_MAX,
NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index f5dfad283fe2..c9b68a093e0f 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -78,6 +78,7 @@ static LIST_HEAD(hw_stats_list);
static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
static u32 net_dm_trunc_len;
+static u32 net_dm_queue_len = 1000;
struct net_dm_alert_ops {
void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb,
@@ -93,8 +94,6 @@ struct net_dm_skb_cb {
#define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0]))
-#define NET_DM_QUEUE_LEN 1000
-
static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
{
size_t al;
@@ -289,7 +288,7 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
data = this_cpu_ptr(&dm_cpu_data);
spin_lock_irqsave(&data->drop_queue.lock, flags);
- if (skb_queue_len(&data->drop_queue) < NET_DM_QUEUE_LEN)
+ if (skb_queue_len(&data->drop_queue) < net_dm_queue_len)
__skb_queue_tail(&data->drop_queue, nskb);
else
goto unlock_free;
@@ -638,6 +637,14 @@ static void net_dm_trunc_len_set(struct genl_info *info)
net_dm_trunc_len = nla_get_u32(info->attrs[NET_DM_ATTR_TRUNC_LEN]);
}
+static void net_dm_queue_len_set(struct genl_info *info)
+{
+ if (!info->attrs[NET_DM_ATTR_QUEUE_LEN])
+ return;
+
+ net_dm_queue_len = nla_get_u32(info->attrs[NET_DM_ATTR_QUEUE_LEN]);
+}
+
static int net_dm_cmd_config(struct sk_buff *skb,
struct genl_info *info)
{
@@ -655,6 +662,8 @@ static int net_dm_cmd_config(struct sk_buff *skb,
net_dm_trunc_len_set(info);
+ net_dm_queue_len_set(info);
+
return 0;
}
@@ -686,6 +695,9 @@ static int net_dm_config_fill(struct sk_buff *msg, struct genl_info *info)
if (nla_put_u32(msg, NET_DM_ATTR_TRUNC_LEN, net_dm_trunc_len))
goto nla_put_failure;
+ if (nla_put_u32(msg, NET_DM_ATTR_QUEUE_LEN, net_dm_queue_len))
+ goto nla_put_failure;
+
genlmsg_end(msg, hdr);
return 0;
@@ -758,6 +770,7 @@ static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = {
[NET_DM_ATTR_UNSPEC] = { .strict_start_type = NET_DM_ATTR_UNSPEC + 1 },
[NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 },
[NET_DM_ATTR_TRUNC_LEN] = { .type = NLA_U32 },
+ [NET_DM_ATTR_QUEUE_LEN] = { .type = NLA_U32 },
};
static const struct genl_ops dropmon_ops[] = {
--
2.21.0
^ permalink raw reply related
* [PATCH net-next 08/10] drop_monitor: Add a command to query current configuration
From: Ido Schimmel @ 2019-08-07 10:30 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
Ido Schimmel
In-Reply-To: <20190807103059.15270-1-idosch@idosch.org>
From: Ido Schimmel <idosch@mellanox.com>
Users should be able to query the current configuration of drop monitor
before they start using it. Add a command to query the existing
configuration which currently consists of alert mode and packet
truncation length.
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
include/uapi/linux/net_dropmon.h | 2 ++
net/core/drop_monitor.c | 48 ++++++++++++++++++++++++++++++++
2 files changed, 50 insertions(+)
diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 9c7b3ea44ee5..7b15f632c045 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -54,6 +54,8 @@ enum {
NET_DM_CMD_START,
NET_DM_CMD_STOP,
NET_DM_CMD_PACKET_ALERT,
+ NET_DM_CMD_CONFIG_GET,
+ NET_DM_CMD_CONFIG_NEW,
_NET_DM_CMD_MAX,
};
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 440766e1f260..f5dfad283fe2 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -671,6 +671,50 @@ static int net_dm_cmd_trace(struct sk_buff *skb,
return -EOPNOTSUPP;
}
+static int net_dm_config_fill(struct sk_buff *msg, struct genl_info *info)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+ &net_drop_monitor_family, 0, NET_DM_CMD_CONFIG_NEW);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (nla_put_u8(msg, NET_DM_ATTR_ALERT_MODE, net_dm_alert_mode))
+ goto nla_put_failure;
+
+ if (nla_put_u32(msg, NET_DM_ATTR_TRUNC_LEN, net_dm_trunc_len))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static int net_dm_cmd_config_get(struct sk_buff *skb, struct genl_info *info)
+{
+ struct sk_buff *msg;
+ int rc;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ rc = net_dm_config_fill(msg, info);
+ if (rc)
+ goto free_msg;
+
+ return genlmsg_reply(msg, info);
+
+free_msg:
+ nlmsg_free(msg);
+ return rc;
+}
+
static int dropmon_net_event(struct notifier_block *ev_block,
unsigned long event, void *ptr)
{
@@ -733,6 +777,10 @@ static const struct genl_ops dropmon_ops[] = {
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = net_dm_cmd_trace,
},
+ {
+ .cmd = NET_DM_CMD_CONFIG_GET,
+ .doit = net_dm_cmd_config_get,
+ },
};
static int net_dm_nl_pre_doit(const struct genl_ops *ops,
--
2.21.0
^ permalink raw reply related
* [PATCH net-next 07/10] drop_monitor: Allow truncation of dropped packets
From: Ido Schimmel @ 2019-08-07 10:30 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
Ido Schimmel
In-Reply-To: <20190807103059.15270-1-idosch@idosch.org>
From: Ido Schimmel <idosch@mellanox.com>
When sending dropped packets to user space it is not always necessary to
copy the entire packet as usually only the headers are of interest.
Allow user to specify the truncation length and add the original length
of the packet as additional metadata to the netlink message.
By default no truncation is performed.
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
include/uapi/linux/net_dropmon.h | 2 ++
net/core/drop_monitor.c | 19 +++++++++++++++++++
2 files changed, 21 insertions(+)
diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 22c6108dcfd4..9c7b3ea44ee5 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -74,6 +74,8 @@ enum net_dm_attr {
NET_DM_ATTR_TIMESTAMP, /* struct timespec */
NET_DM_ATTR_PAYLOAD, /* binary */
NET_DM_ATTR_PAD,
+ NET_DM_ATTR_TRUNC_LEN, /* u32 */
+ NET_DM_ATTR_ORIG_LEN, /* u32 */
__NET_DM_ATTR_MAX,
NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 5fa0b34033d0..440766e1f260 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -77,6 +77,7 @@ static unsigned long dm_hw_check_delta = 2*HZ;
static LIST_HEAD(hw_stats_list);
static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
+static u32 net_dm_trunc_len;
struct net_dm_alert_ops {
void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb,
@@ -334,6 +335,8 @@ static size_t net_dm_packet_report_size(size_t payload_len)
net_dm_in_port_size() +
/* NET_DM_ATTR_TIMESTAMP */
nla_total_size(sizeof(struct timespec)) +
+ /* NET_DM_ATTR_ORIG_LEN */
+ nla_total_size(sizeof(u32)) +
/* NET_DM_ATTR_PAYLOAD */
nla_total_size(payload_len);
}
@@ -389,6 +392,9 @@ static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
nla_put(msg, NET_DM_ATTR_TIMESTAMP, sizeof(ts), &ts))
goto nla_put_failure;
+ if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len))
+ goto nla_put_failure;
+
if (!payload_len)
goto out;
@@ -424,6 +430,8 @@ static void net_dm_packet_report(struct sk_buff *skb)
/* Ensure packet fits inside a single netlink attribute */
payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE);
+ if (net_dm_trunc_len)
+ payload_len = min_t(size_t, net_dm_trunc_len, payload_len);
msg = nlmsg_new(net_dm_packet_report_size(payload_len), GFP_KERNEL);
if (!msg)
@@ -622,6 +630,14 @@ static int net_dm_alert_mode_set(struct genl_info *info)
return 0;
}
+static void net_dm_trunc_len_set(struct genl_info *info)
+{
+ if (!info->attrs[NET_DM_ATTR_TRUNC_LEN])
+ return;
+
+ net_dm_trunc_len = nla_get_u32(info->attrs[NET_DM_ATTR_TRUNC_LEN]);
+}
+
static int net_dm_cmd_config(struct sk_buff *skb,
struct genl_info *info)
{
@@ -637,6 +653,8 @@ static int net_dm_cmd_config(struct sk_buff *skb,
if (rc)
return rc;
+ net_dm_trunc_len_set(info);
+
return 0;
}
@@ -695,6 +713,7 @@ static int dropmon_net_event(struct notifier_block *ev_block,
static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = {
[NET_DM_ATTR_UNSPEC] = { .strict_start_type = NET_DM_ATTR_UNSPEC + 1 },
[NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 },
+ [NET_DM_ATTR_TRUNC_LEN] = { .type = NLA_U32 },
};
static const struct genl_ops dropmon_ops[] = {
--
2.21.0
^ permalink raw reply related
* [PATCH net-next 06/10] drop_monitor: Add packet alert mode
From: Ido Schimmel @ 2019-08-07 10:30 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
Ido Schimmel
In-Reply-To: <20190807103059.15270-1-idosch@idosch.org>
From: Ido Schimmel <idosch@mellanox.com>
So far drop monitor supported only one alert mode in which a summary of
locations in which packets were recently dropped was sent to user space.
This alert mode is sufficient in order to understand that packets were
dropped, but lacks information to perform a more detailed analysis.
Add a new alert mode in which the dropped packet itself is passed to
user space along with metadata: The drop location (as program counter
and resolved symbol), ingress netdevice and drop timestamp. More
metadata can be added in the future.
To avoid performing expensive operations in the context in which
kfree_skb() is invoked (can be hard IRQ), the dropped skb is cloned and
queued on per-CPU skb drop list. Then, in process context the netlink
message is allocated, prepared and finally sent to user space.
The per-CPU skb drop list is limited to 1000 skbs to prevent exhausting
the system's memory. Subsequent patches will make this limit
configurable and also add a counter that indicates how many skbs were
tail dropped.
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
include/uapi/linux/net_dropmon.h | 26 +++
net/core/drop_monitor.c | 275 ++++++++++++++++++++++++++++++-
2 files changed, 299 insertions(+), 2 deletions(-)
diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 0fecdedeb6ca..22c6108dcfd4 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -53,6 +53,7 @@ enum {
NET_DM_CMD_CONFIG,
NET_DM_CMD_START,
NET_DM_CMD_STOP,
+ NET_DM_CMD_PACKET_ALERT,
_NET_DM_CMD_MAX,
};
@@ -63,12 +64,37 @@ enum {
*/
#define NET_DM_GRP_ALERT 1
+enum net_dm_attr {
+ NET_DM_ATTR_UNSPEC,
+
+ NET_DM_ATTR_ALERT_MODE, /* u8 */
+ NET_DM_ATTR_PC, /* u64 */
+ NET_DM_ATTR_SYMBOL, /* string */
+ NET_DM_ATTR_IN_PORT, /* nested */
+ NET_DM_ATTR_TIMESTAMP, /* struct timespec */
+ NET_DM_ATTR_PAYLOAD, /* binary */
+ NET_DM_ATTR_PAD,
+
+ __NET_DM_ATTR_MAX,
+ NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1
+};
+
/**
* enum net_dm_alert_mode - Alert mode.
* @NET_DM_ALERT_MODE_SUMMARY: A summary of recent drops is sent to user space.
+ * @NET_DM_ALERT_MODE_PACKET: Each dropped packet is sent to user space along
+ * with metadata.
*/
enum net_dm_alert_mode {
NET_DM_ALERT_MODE_SUMMARY,
+ NET_DM_ALERT_MODE_PACKET,
+};
+
+enum {
+ NET_DM_ATTR_PORT_NETDEV_IFINDEX, /* u32 */
+
+ __NET_DM_ATTR_PORT_MAX,
+ NET_DM_ATTR_PORT_MAX = __NET_DM_ATTR_PORT_MAX - 1
};
#endif
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 9cd2f662cb9e..5fa0b34033d0 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -54,6 +54,7 @@ static DEFINE_MUTEX(net_dm_mutex);
struct per_cpu_dm_data {
spinlock_t lock; /* Protects 'skb' and 'send_timer' */
struct sk_buff *skb;
+ struct sk_buff_head drop_queue;
struct work_struct dm_alert_work;
struct timer_list send_timer;
};
@@ -85,6 +86,14 @@ struct net_dm_alert_ops {
void (*work_item_func)(struct work_struct *work);
};
+struct net_dm_skb_cb {
+ void *pc;
+};
+
+#define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0]))
+
+#define NET_DM_QUEUE_LEN 1000
+
static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
{
size_t al;
@@ -257,8 +266,209 @@ static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
.work_item_func = send_dm_alert,
};
+static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
+ struct sk_buff *skb,
+ void *location)
+{
+ ktime_t tstamp = ktime_get_real();
+ struct per_cpu_dm_data *data;
+ struct sk_buff *nskb;
+ unsigned long flags;
+
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (!nskb)
+ return;
+
+ NET_DM_SKB_CB(nskb)->pc = location;
+ /* Override the timestamp because we care about the time when the
+ * packet was dropped.
+ */
+ nskb->tstamp = tstamp;
+
+ data = this_cpu_ptr(&dm_cpu_data);
+
+ spin_lock_irqsave(&data->drop_queue.lock, flags);
+ if (skb_queue_len(&data->drop_queue) < NET_DM_QUEUE_LEN)
+ __skb_queue_tail(&data->drop_queue, nskb);
+ else
+ goto unlock_free;
+ spin_unlock_irqrestore(&data->drop_queue.lock, flags);
+
+ schedule_work(&data->dm_alert_work);
+
+ return;
+
+unlock_free:
+ spin_unlock_irqrestore(&data->drop_queue.lock, flags);
+ consume_skb(nskb);
+}
+
+static void net_dm_packet_trace_napi_poll_hit(void *ignore,
+ struct napi_struct *napi,
+ int work, int budget)
+{
+}
+
+static size_t net_dm_in_port_size(void)
+{
+ /* NET_DM_ATTR_IN_PORT nest */
+ return nla_total_size(0) +
+ /* NET_DM_ATTR_PORT_NETDEV_IFINDEX */
+ nla_total_size(sizeof(u32));
+}
+
+#define NET_DM_MAX_SYMBOL_LEN 40
+
+static size_t net_dm_packet_report_size(size_t payload_len)
+{
+ size_t size;
+
+ size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize);
+
+ return NLMSG_ALIGN(size) +
+ /* NET_DM_ATTR_PC */
+ nla_total_size(sizeof(u64)) +
+ /* NET_DM_ATTR_SYMBOL */
+ nla_total_size(NET_DM_MAX_SYMBOL_LEN + 1) +
+ /* NET_DM_ATTR_IN_PORT */
+ net_dm_in_port_size() +
+ /* NET_DM_ATTR_TIMESTAMP */
+ nla_total_size(sizeof(struct timespec)) +
+ /* NET_DM_ATTR_PAYLOAD */
+ nla_total_size(payload_len);
+}
+
+static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex)
+{
+ struct nlattr *attr;
+
+ attr = nla_nest_start(msg, NET_DM_ATTR_IN_PORT);
+ if (!attr)
+ return -EMSGSIZE;
+
+ if (ifindex &&
+ nla_put_u32(msg, NET_DM_ATTR_PORT_NETDEV_IFINDEX, ifindex))
+ goto nla_put_failure;
+
+ nla_nest_end(msg, attr);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, attr);
+ return -EMSGSIZE;
+}
+
+static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
+ size_t payload_len)
+{
+ u64 pc = (u64)(uintptr_t) NET_DM_SKB_CB(skb)->pc;
+ char buf[NET_DM_MAX_SYMBOL_LEN];
+ struct nlattr *attr;
+ struct timespec ts;
+ void *hdr;
+ int rc;
+
+ hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
+ NET_DM_CMD_PACKET_ALERT);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, pc, NET_DM_ATTR_PAD))
+ goto nla_put_failure;
+
+ snprintf(buf, sizeof(buf), "%pS", NET_DM_SKB_CB(skb)->pc);
+ if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf))
+ goto nla_put_failure;
+
+ rc = net_dm_packet_report_in_port_put(msg, skb->skb_iif);
+ if (rc)
+ goto nla_put_failure;
+
+ if (ktime_to_timespec_cond(skb->tstamp, &ts) &&
+ nla_put(msg, NET_DM_ATTR_TIMESTAMP, sizeof(ts), &ts))
+ goto nla_put_failure;
+
+ if (!payload_len)
+ goto out;
+
+ attr = skb_put(msg, nla_total_size(payload_len));
+ attr->nla_type = NET_DM_ATTR_PAYLOAD;
+ attr->nla_len = nla_attr_size(payload_len);
+ if (skb_copy_bits(skb, 0, nla_data(attr), payload_len))
+ goto nla_put_failure;
+
+out:
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+#define NET_DM_MAX_PACKET_SIZE (0xffff - NLA_HDRLEN - NLA_ALIGNTO)
+
+static void net_dm_packet_report(struct sk_buff *skb)
+{
+ struct sk_buff *msg;
+ size_t payload_len;
+ int rc;
+
+ /* Make sure we start copying the packet from the MAC header */
+ if (skb->data > skb_mac_header(skb))
+ skb_push(skb, skb->data - skb_mac_header(skb));
+ else
+ skb_pull(skb, skb_mac_header(skb) - skb->data);
+
+ /* Ensure packet fits inside a single netlink attribute */
+ payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE);
+
+ msg = nlmsg_new(net_dm_packet_report_size(payload_len), GFP_KERNEL);
+ if (!msg)
+ goto out;
+
+ rc = net_dm_packet_report_fill(msg, skb, payload_len);
+ if (rc) {
+ nlmsg_free(msg);
+ goto out;
+ }
+
+ genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
+
+out:
+ consume_skb(skb);
+}
+
+static void net_dm_packet_work(struct work_struct *work)
+{
+ struct per_cpu_dm_data *data;
+ struct sk_buff_head list;
+ struct sk_buff *skb;
+ unsigned long flags;
+
+ data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
+
+ __skb_queue_head_init(&list);
+
+ spin_lock_irqsave(&data->drop_queue.lock, flags);
+ skb_queue_splice_tail_init(&data->drop_queue, &list);
+ spin_unlock_irqrestore(&data->drop_queue.lock, flags);
+
+ while ((skb = __skb_dequeue(&list)))
+ net_dm_packet_report(skb);
+}
+
+static const struct net_dm_alert_ops net_dm_alert_packet_ops = {
+ .kfree_skb_probe = net_dm_packet_trace_kfree_skb_hit,
+ .napi_poll_probe = net_dm_packet_trace_napi_poll_hit,
+ .work_item_func = net_dm_packet_work,
+};
+
static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
[NET_DM_ALERT_MODE_SUMMARY] = &net_dm_alert_summary_ops,
+ [NET_DM_ALERT_MODE_PACKET] = &net_dm_alert_packet_ops,
};
static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
@@ -326,9 +536,12 @@ static void net_dm_trace_off_set(void)
*/
for_each_possible_cpu(cpu) {
struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
+ struct sk_buff *skb;
del_timer_sync(&data->send_timer);
cancel_work_sync(&data->dm_alert_work);
+ while ((skb = __skb_dequeue(&data->drop_queue)))
+ consume_skb(skb);
}
list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
@@ -370,12 +583,61 @@ static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack)
return rc;
}
+static int net_dm_alert_mode_get_from_info(struct genl_info *info,
+ enum net_dm_alert_mode *p_alert_mode)
+{
+ u8 val;
+
+ val = nla_get_u8(info->attrs[NET_DM_ATTR_ALERT_MODE]);
+
+ switch (val) {
+ case NET_DM_ALERT_MODE_SUMMARY: /* fall-through */
+ case NET_DM_ALERT_MODE_PACKET:
+ *p_alert_mode = val;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int net_dm_alert_mode_set(struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ enum net_dm_alert_mode alert_mode;
+ int rc;
+
+ if (!info->attrs[NET_DM_ATTR_ALERT_MODE])
+ return 0;
+
+ rc = net_dm_alert_mode_get_from_info(info, &alert_mode);
+ if (rc) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid alert mode");
+ return -EINVAL;
+ }
+
+ net_dm_alert_mode = alert_mode;
+
+ return 0;
+}
+
static int net_dm_cmd_config(struct sk_buff *skb,
struct genl_info *info)
{
- NL_SET_ERR_MSG_MOD(info->extack, "Command not supported");
+ struct netlink_ext_ack *extack = info->extack;
+ int rc;
- return -EOPNOTSUPP;
+ if (trace_state == TRACE_ON) {
+ NL_SET_ERR_MSG_MOD(extack, "Cannot configure drop monitor while tracing is on");
+ return -EBUSY;
+ }
+
+ rc = net_dm_alert_mode_set(info);
+ if (rc)
+ return rc;
+
+ return 0;
}
static int net_dm_cmd_trace(struct sk_buff *skb,
@@ -430,6 +692,11 @@ static int dropmon_net_event(struct notifier_block *ev_block,
return NOTIFY_DONE;
}
+static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = {
+ [NET_DM_ATTR_UNSPEC] = { .strict_start_type = NET_DM_ATTR_UNSPEC + 1 },
+ [NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 },
+};
+
static const struct genl_ops dropmon_ops[] = {
{
.cmd = NET_DM_CMD_CONFIG,
@@ -467,6 +734,8 @@ static struct genl_family net_drop_monitor_family __ro_after_init = {
.hdrsize = 0,
.name = "NET_DM",
.version = 2,
+ .maxattr = NET_DM_ATTR_MAX,
+ .policy = net_dm_nl_policy,
.pre_doit = net_dm_nl_pre_doit,
.post_doit = net_dm_nl_post_doit,
.module = THIS_MODULE,
@@ -510,6 +779,7 @@ static int __init init_net_drop_monitor(void)
for_each_possible_cpu(cpu) {
data = &per_cpu(dm_cpu_data, cpu);
spin_lock_init(&data->lock);
+ skb_queue_head_init(&data->drop_queue);
}
goto out;
@@ -539,6 +809,7 @@ static void exit_net_drop_monitor(void)
* to this struct and can free the skb inside it
*/
kfree_skb(data->skb);
+ WARN_ON(!skb_queue_empty(&data->drop_queue));
}
BUG_ON(genl_unregister_family(&net_drop_monitor_family));
--
2.21.0
^ permalink raw reply related
* [PATCH net-next 05/10] drop_monitor: Add alert mode operations
From: Ido Schimmel @ 2019-08-07 10:30 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
Ido Schimmel
In-Reply-To: <20190807103059.15270-1-idosch@idosch.org>
From: Ido Schimmel <idosch@mellanox.com>
The next patch is going to add another alert mode in which the dropped
packet is notified to user space, instead of only a summary of recent
drops.
Abstract the differences between the modes by adding alert mode
operations. The operations are selected based on the currently
configured mode and associated with the probes and the work item just
before tracing starts.
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
include/uapi/linux/net_dropmon.h | 9 ++++++++
net/core/drop_monitor.c | 38 +++++++++++++++++++++++++++-----
2 files changed, 41 insertions(+), 6 deletions(-)
diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 5edbd0a675fd..0fecdedeb6ca 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -62,4 +62,13 @@ enum {
* Our group identifiers
*/
#define NET_DM_GRP_ALERT 1
+
+/**
+ * enum net_dm_alert_mode - Alert mode.
+ * @NET_DM_ALERT_MODE_SUMMARY: A summary of recent drops is sent to user space.
+ */
+enum net_dm_alert_mode {
+ NET_DM_ALERT_MODE_SUMMARY,
+};
+
#endif
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index cd2f3069f34e..9cd2f662cb9e 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -75,6 +75,16 @@ static int dm_delay = 1;
static unsigned long dm_hw_check_delta = 2*HZ;
static LIST_HEAD(hw_stats_list);
+static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
+
+struct net_dm_alert_ops {
+ void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb,
+ void *location);
+ void (*napi_poll_probe)(void *ignore, struct napi_struct *napi,
+ int work, int budget);
+ void (*work_item_func)(struct work_struct *work);
+};
+
static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
{
size_t al;
@@ -241,10 +251,23 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
rcu_read_unlock();
}
+static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
+ .kfree_skb_probe = trace_kfree_skb_hit,
+ .napi_poll_probe = trace_napi_poll_hit,
+ .work_item_func = send_dm_alert,
+};
+
+static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
+ [NET_DM_ALERT_MODE_SUMMARY] = &net_dm_alert_summary_ops,
+};
+
static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
{
+ const struct net_dm_alert_ops *ops;
int cpu, rc;
+ ops = net_dm_alert_ops_arr[net_dm_alert_mode];
+
if (!try_module_get(THIS_MODULE)) {
NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
return -ENODEV;
@@ -254,7 +277,7 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
struct sk_buff *skb;
- INIT_WORK(&data->dm_alert_work, send_dm_alert);
+ INIT_WORK(&data->dm_alert_work, ops->work_item_func);
timer_setup(&data->send_timer, sched_send_work, 0);
/* Allocate a new per-CPU skb for the summary alert message and
* free the old one which might contain stale data from
@@ -264,13 +287,13 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
consume_skb(skb);
}
- rc = register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+ rc = register_trace_kfree_skb(ops->kfree_skb_probe, NULL);
if (rc) {
NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to kfree_skb() tracepoint");
goto err_module_put;
}
- rc = register_trace_napi_poll(trace_napi_poll_hit, NULL);
+ rc = register_trace_napi_poll(ops->napi_poll_probe, NULL);
if (rc) {
NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to napi_poll() tracepoint");
goto err_unregister_trace;
@@ -279,7 +302,7 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
return 0;
err_unregister_trace:
- unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+ unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL);
err_module_put:
module_put(THIS_MODULE);
return rc;
@@ -288,10 +311,13 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
static void net_dm_trace_off_set(void)
{
struct dm_hw_stat_delta *new_stat, *temp;
+ const struct net_dm_alert_ops *ops;
int cpu;
- unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
- unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+ ops = net_dm_alert_ops_arr[net_dm_alert_mode];
+
+ unregister_trace_napi_poll(ops->napi_poll_probe, NULL);
+ unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL);
tracepoint_synchronize_unregister();
--
2.21.0
^ permalink raw reply related
* [PATCH net-next 03/10] drop_monitor: Reset per-CPU data before starting to trace
From: Ido Schimmel @ 2019-08-07 10:30 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
Ido Schimmel
In-Reply-To: <20190807103059.15270-1-idosch@idosch.org>
From: Ido Schimmel <idosch@mellanox.com>
The function reset_per_cpu_data() allocates and prepares a new skb for
the summary netlink alert message ('NET_DM_CMD_ALERT'). The new skb is
stored in the per-CPU 'data' variable and the old is returned.
The function is invoked during module initialization and from the
workqueue, before an alert is sent. This means that it is possible to
receive an alert with stale data, if we stopped tracing when the
hysteresis timer ('data->send_timer') was pending.
Instead of invoking the function during module initialization, invoke it
just before we start tracing and ensure we get a fresh skb.
This also allows us to remove the calls to initialize the timer and the
work item from the module initialization path, since both could have
been triggered by the error paths of reset_per_cpu_data().
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
net/core/drop_monitor.c | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index b266dc1660ed..1cf4988de591 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -252,9 +252,16 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
for_each_possible_cpu(cpu) {
struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
+ struct sk_buff *skb;
INIT_WORK(&data->dm_alert_work, send_dm_alert);
timer_setup(&data->send_timer, sched_send_work, 0);
+ /* Allocate a new per-CPU skb for the summary alert message and
+ * free the old one which might contain stale data from
+ * previous tracing.
+ */
+ skb = reset_per_cpu_data(data);
+ consume_skb(skb);
}
rc = register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
@@ -475,10 +482,7 @@ static int __init init_net_drop_monitor(void)
for_each_possible_cpu(cpu) {
data = &per_cpu(dm_cpu_data, cpu);
- INIT_WORK(&data->dm_alert_work, send_dm_alert);
- timer_setup(&data->send_timer, sched_send_work, 0);
spin_lock_init(&data->lock);
- reset_per_cpu_data(data);
}
goto out;
--
2.21.0
^ permalink raw reply related
* [PATCH net-next 04/10] drop_monitor: Require CAP_NET_ADMIN for drop monitor configuration
From: Ido Schimmel @ 2019-08-07 10:30 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
Ido Schimmel
In-Reply-To: <20190807103059.15270-1-idosch@idosch.org>
From: Ido Schimmel <idosch@mellanox.com>
Currently, the configure command does not do anything but return an
error. Subsequent patches will enable the command to change various
configuration options such as alert mode and packet truncation.
Similar to other netlink-based configuration channels, make sure only
users with the CAP_NET_ADMIN capability set can execute this command.
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
net/core/drop_monitor.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 1cf4988de591..cd2f3069f34e 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -409,6 +409,7 @@ static const struct genl_ops dropmon_ops[] = {
.cmd = NET_DM_CMD_CONFIG,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = net_dm_cmd_config,
+ .flags = GENL_ADMIN_PERM,
},
{
.cmd = NET_DM_CMD_START,
--
2.21.0
^ permalink raw reply related
* [PATCH net-next 02/10] drop_monitor: Initialize timer and work item upon tracing enable
From: Ido Schimmel @ 2019-08-07 10:30 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
Ido Schimmel
In-Reply-To: <20190807103059.15270-1-idosch@idosch.org>
From: Ido Schimmel <idosch@mellanox.com>
The timer and work item are currently initialized once during module
init, but subsequent patches will need to associate different functions
with the work item, based on the configured alert mode.
Allow subsequent patches to make that change by initializing and
de-initializing these objects during tracing enable and disable.
This also guarantees that once the request to disable tracing returns,
no more netlink notifications will be generated.
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
net/core/drop_monitor.c | 24 +++++++++++++++++++-----
1 file changed, 19 insertions(+), 5 deletions(-)
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 8b9b0b899ebc..b266dc1660ed 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -243,13 +243,20 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
{
- int rc;
+ int cpu, rc;
if (!try_module_get(THIS_MODULE)) {
NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
return -ENODEV;
}
+ for_each_possible_cpu(cpu) {
+ struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
+
+ INIT_WORK(&data->dm_alert_work, send_dm_alert);
+ timer_setup(&data->send_timer, sched_send_work, 0);
+ }
+
rc = register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
if (rc) {
NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to kfree_skb() tracepoint");
@@ -274,12 +281,23 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
static void net_dm_trace_off_set(void)
{
struct dm_hw_stat_delta *new_stat, *temp;
+ int cpu;
unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
tracepoint_synchronize_unregister();
+ /* Make sure we do not send notifications to user space after request
+ * to stop tracing returns.
+ */
+ for_each_possible_cpu(cpu) {
+ struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
+
+ del_timer_sync(&data->send_timer);
+ cancel_work_sync(&data->dm_alert_work);
+ }
+
list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
if (new_stat->dev == NULL) {
list_del_rcu(&new_stat->list);
@@ -481,14 +499,10 @@ static void exit_net_drop_monitor(void)
/*
* Because of the module_get/put we do in the trace state change path
* we are guarnateed not to have any current users when we get here
- * all we need to do is make sure that we don't have any running timers
- * or pending schedule calls
*/
for_each_possible_cpu(cpu) {
data = &per_cpu(dm_cpu_data, cpu);
- del_timer_sync(&data->send_timer);
- cancel_work_sync(&data->dm_alert_work);
/*
* At this point, we should have exclusive access
* to this struct and can free the skb inside it
--
2.21.0
^ permalink raw reply related
* [PATCH net-next 01/10] drop_monitor: Split tracing enable / disable to different functions
From: Ido Schimmel @ 2019-08-07 10:30 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
Ido Schimmel
In-Reply-To: <20190807103059.15270-1-idosch@idosch.org>
From: Ido Schimmel <idosch@mellanox.com>
Subsequent patches will need to enable / disable tracing based on the
configured alerting mode.
Reduce the nesting level and prepare for the introduction of this
functionality by splitting the tracing enable / disable operations into
two different functions.
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
net/core/drop_monitor.c | 79 ++++++++++++++++++++++++++---------------
1 file changed, 51 insertions(+), 28 deletions(-)
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 4deb86f990f1..8b9b0b899ebc 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -241,11 +241,58 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
rcu_read_unlock();
}
+static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
+{
+ int rc;
+
+ if (!try_module_get(THIS_MODULE)) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
+ return -ENODEV;
+ }
+
+ rc = register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+ if (rc) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to kfree_skb() tracepoint");
+ goto err_module_put;
+ }
+
+ rc = register_trace_napi_poll(trace_napi_poll_hit, NULL);
+ if (rc) {
+ NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to napi_poll() tracepoint");
+ goto err_unregister_trace;
+ }
+
+ return 0;
+
+err_unregister_trace:
+ unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+err_module_put:
+ module_put(THIS_MODULE);
+ return rc;
+}
+
+static void net_dm_trace_off_set(void)
+{
+ struct dm_hw_stat_delta *new_stat, *temp;
+
+ unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
+ unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+
+ tracepoint_synchronize_unregister();
+
+ list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
+ if (new_stat->dev == NULL) {
+ list_del_rcu(&new_stat->list);
+ kfree_rcu(new_stat, rcu);
+ }
+ }
+
+ module_put(THIS_MODULE);
+}
+
static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack)
{
int rc = 0;
- struct dm_hw_stat_delta *new_stat = NULL;
- struct dm_hw_stat_delta *temp;
if (state == trace_state) {
NL_SET_ERR_MSG_MOD(extack, "Trace state already set to requested state");
@@ -254,34 +301,10 @@ static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack)
switch (state) {
case TRACE_ON:
- if (!try_module_get(THIS_MODULE)) {
- NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
- rc = -ENODEV;
- break;
- }
-
- rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
- rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL);
+ rc = net_dm_trace_on_set(extack);
break;
-
case TRACE_OFF:
- rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
- rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
-
- tracepoint_synchronize_unregister();
-
- /*
- * Clean the device list
- */
- list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
- if (new_stat->dev == NULL) {
- list_del_rcu(&new_stat->list);
- kfree_rcu(new_stat, rcu);
- }
- }
-
- module_put(THIS_MODULE);
-
+ net_dm_trace_off_set();
break;
default:
rc = 1;
--
2.21.0
^ permalink raw reply related
* [PATCH net-next 00/10] drop_monitor: Capture dropped packets and metadata
From: Ido Schimmel @ 2019-08-07 10:30 UTC (permalink / raw)
To: netdev
Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
Ido Schimmel
From: Ido Schimmel <idosch@mellanox.com>
So far drop monitor supported only one mode of operation in which a
summary of recent packet drops is periodically sent to user space as a
netlink event. The event only includes the drop location (program
counter) and number of drops in the last interval.
While this mode of operation allows one to understand if the system is
dropping packets, it is not sufficient if a more detailed analysis is
required. Both the packet itself and related metadata are missing.
This patchset extends drop monitor with another mode of operation where
the packet - potentially truncated - and metadata (e.g., drop location,
timestamp, netdev) are sent to user space as a netlink event. Thanks to
the extensible nature of netlink, more metadata can be added in the
future.
To avoid performing expensive operations in the context in which
kfree_skb() is called, the dropped skbs are cloned and queued on per-CPU
skb drop list. The list is then processed in process context (using a
workqueue), where the netlink messages are allocated, prepared and
finally sent to user space.
A follow-up patchset will integrate drop monitor with devlink and allow
the latter to call into drop monitor to report hardware drops. In the
future, XDP drops can be added as well, thereby making drop monitor the
go-to netlink channel for diagnosing all packet drops.
Example usage with patched dropwatch [1] can be found here [2]. Example
dissection of drop monitor netlink events with patched wireshark [3] can
be found here [4]. I will submit both changes upstream after the kernel
changes are accepted. Another change worth making is adding a dropmon
pseudo interface to libpcap, similar to the nflog interface [5]. This
will allow users to specifically listen on dropmon traffic instead of
capturing all netlink packets via the nlmon netdev.
Patches #1-#5 prepare the code towards the actual changes in later
patches.
Patch #6 adds another mode of operation to drop monitor in which the
dropped packet itself is notified to user space along with metadata.
Patch #7 allows users to truncate reported packets to a specific length,
in case only the headers are of interest. The original length of the
packet is added as metadata to the netlink notification.
Patch #8 allows user to query the current configuration of drop monitor
(e.g., alert mode, truncation length).
Patches #9-#10 allow users to tune the length of the per-CPU skb drop
list according to their needs.
Changes since RFC [6]:
* Limit the length of the per-CPU skb drop list and make it configurable
* Do not use the hysteresis timer in packet alert mode
* Introduce alert mode operations in a separate patch and only then
introduce the new alert mode
* Use 'skb->skb_iif' instead of 'skb->dev' because the latter is inside
a union with 'dev_scratch' and therefore not guaranteed to point to a
valid netdev
* Return '-EBUSY' instead of '-EOPNOTSUPP' when trying to configure drop
monitor while it is monitoring
* Did not change schedule_work() in favor of schedule_work_on() as I did
not observe a change in number of tail drops
[1] https://github.com/idosch/dropwatch/tree/packet-mode
[2] https://gist.github.com/idosch/166b64384577174230fd2523866f6b1c#file-gistfile1-txt
[3] https://github.com/idosch/wireshark/tree/drop-monitor-v1
[4] https://gist.github.com/idosch/166b64384577174230fd2523866f6b1c#file-gistfile2-txt
[5] https://github.com/the-tcpdump-group/libpcap/blob/master/pcap-netfilter-linux.c
[6] https://patchwork.ozlabs.org/cover/1135226/
Ido Schimmel (10):
drop_monitor: Split tracing enable / disable to different functions
drop_monitor: Initialize timer and work item upon tracing enable
drop_monitor: Reset per-CPU data before starting to trace
drop_monitor: Require CAP_NET_ADMIN for drop monitor configuration
drop_monitor: Add alert mode operations
drop_monitor: Add packet alert mode
drop_monitor: Allow truncation of dropped packets
drop_monitor: Add a command to query current configuration
drop_monitor: Make drop queue length configurable
drop_monitor: Expose tail drop counter
include/uapi/linux/net_dropmon.h | 50 +++
net/core/drop_monitor.c | 594 +++++++++++++++++++++++++++++--
2 files changed, 607 insertions(+), 37 deletions(-)
--
2.21.0
^ permalink raw reply
* Re: [PATCH net-next v2] ibmveth: Allow users to update reported speed and duplex
From: Michael Ellerman @ 2019-08-07 10:09 UTC (permalink / raw)
To: Thomas Falcon; +Cc: netdev, linuxppc-dev, Thomas Falcon
In-Reply-To: <1565108588-17331-1-git-send-email-tlfalcon@linux.ibm.com>
Thomas Falcon <tlfalcon@linux.ibm.com> writes:
> Reported ethtool link settings for the ibmveth driver are currently
> hardcoded and no longer reflect the actual capabilities of supported
> hardware. There is no interface designed for retrieving this information
> from device firmware nor is there any way to update current settings
> to reflect observed or expected link speeds.
>
> To avoid breaking existing configurations, retain current values as
> default settings but let users update them to match the expected
> capabilities of underlying hardware if needed. This update would
> allow the use of configurations that rely on certain link speed
> settings, such as LACP. This patch is based on the implementation
> in virtio_net.
>
> Signed-off-by: Thomas Falcon <tlfalcon@linux.ibm.com>
> ---
> v2: Updated default driver speed/duplex settings to avoid
> breaking existing setups
Thanks.
I won't give you an ack because I don't know jack about network drivers
these days, but I think that alleviates my concern about breaking
existing setups. I'll leave the rest of the review up to the networking
folks.
cheers
> diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
> index d654c23..5dc634f 100644
> --- a/drivers/net/ethernet/ibm/ibmveth.c
> +++ b/drivers/net/ethernet/ibm/ibmveth.c
> @@ -712,31 +712,68 @@ static int ibmveth_close(struct net_device *netdev)
> return 0;
> }
>
> -static int netdev_get_link_ksettings(struct net_device *dev,
> - struct ethtool_link_ksettings *cmd)
> +static bool
> +ibmveth_validate_ethtool_cmd(const struct ethtool_link_ksettings *cmd)
> {
> - u32 supported, advertising;
> -
> - supported = (SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg |
> - SUPPORTED_FIBRE);
> - advertising = (ADVERTISED_1000baseT_Full | ADVERTISED_Autoneg |
> - ADVERTISED_FIBRE);
> - cmd->base.speed = SPEED_1000;
> - cmd->base.duplex = DUPLEX_FULL;
> - cmd->base.port = PORT_FIBRE;
> - cmd->base.phy_address = 0;
> - cmd->base.autoneg = AUTONEG_ENABLE;
> -
> - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
> - supported);
> - ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
> - advertising);
> + struct ethtool_link_ksettings diff1 = *cmd;
> + struct ethtool_link_ksettings diff2 = {};
> +
> + diff2.base.port = PORT_OTHER;
> + diff1.base.speed = 0;
> + diff1.base.duplex = 0;
> + diff1.base.cmd = 0;
> + diff1.base.link_mode_masks_nwords = 0;
> + ethtool_link_ksettings_zero_link_mode(&diff1, advertising);
> +
> + return !memcmp(&diff1.base, &diff2.base, sizeof(diff1.base)) &&
> + bitmap_empty(diff1.link_modes.supported,
> + __ETHTOOL_LINK_MODE_MASK_NBITS) &&
> + bitmap_empty(diff1.link_modes.advertising,
> + __ETHTOOL_LINK_MODE_MASK_NBITS) &&
> + bitmap_empty(diff1.link_modes.lp_advertising,
> + __ETHTOOL_LINK_MODE_MASK_NBITS);
> +}
> +
> +static int ibmveth_set_link_ksettings(struct net_device *dev,
> + const struct ethtool_link_ksettings *cmd)
> +{
> + struct ibmveth_adapter *adapter = netdev_priv(dev);
> + u32 speed;
> + u8 duplex;
> +
> + speed = cmd->base.speed;
> + duplex = cmd->base.duplex;
> + /* don't allow custom speed and duplex */
> + if (!ethtool_validate_speed(speed) ||
> + !ethtool_validate_duplex(duplex) ||
> + !ibmveth_validate_ethtool_cmd(cmd))
> + return -EINVAL;
> + adapter->speed = speed;
> + adapter->duplex = duplex;
>
> return 0;
> }
>
> -static void netdev_get_drvinfo(struct net_device *dev,
> - struct ethtool_drvinfo *info)
> +static int ibmveth_get_link_ksettings(struct net_device *dev,
> + struct ethtool_link_ksettings *cmd)
> +{
> + struct ibmveth_adapter *adapter = netdev_priv(dev);
> +
> + cmd->base.speed = adapter->speed;
> + cmd->base.duplex = adapter->duplex;
> + cmd->base.port = PORT_OTHER;
> +
> + return 0;
> +}
> +
> +static void ibmveth_init_link_settings(struct ibmveth_adapter *adapter)
> +{
> + adapter->duplex = DUPLEX_FULL;
> + adapter->speed = SPEED_1000;
> +}
> +
> +static void ibmveth_get_drvinfo(struct net_device *dev,
> + struct ethtool_drvinfo *info)
> {
> strlcpy(info->driver, ibmveth_driver_name, sizeof(info->driver));
> strlcpy(info->version, ibmveth_driver_version, sizeof(info->version));
> @@ -965,12 +1002,13 @@ static void ibmveth_get_ethtool_stats(struct net_device *dev,
> }
>
> static const struct ethtool_ops netdev_ethtool_ops = {
> - .get_drvinfo = netdev_get_drvinfo,
> + .get_drvinfo = ibmveth_get_drvinfo,
> .get_link = ethtool_op_get_link,
> .get_strings = ibmveth_get_strings,
> .get_sset_count = ibmveth_get_sset_count,
> .get_ethtool_stats = ibmveth_get_ethtool_stats,
> - .get_link_ksettings = netdev_get_link_ksettings,
> + .get_link_ksettings = ibmveth_get_link_ksettings,
> + .set_link_ksettings = ibmveth_set_link_ksettings
> };
>
> static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
> @@ -1647,6 +1685,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
> adapter->netdev = netdev;
> adapter->mcastFilterSize = *mcastFilterSize_p;
> adapter->pool_config = 0;
> + ibmveth_init_link_settings(adapter);
>
> netif_napi_add(netdev, &adapter->napi, ibmveth_poll, 16);
>
> diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h
> index 4e9bf34..db96c88 100644
> --- a/drivers/net/ethernet/ibm/ibmveth.h
> +++ b/drivers/net/ethernet/ibm/ibmveth.h
> @@ -162,6 +162,9 @@ struct ibmveth_adapter {
> u64 tx_send_failed;
> u64 tx_large_packets;
> u64 rx_large_packets;
> + /* Ethtool settings */
> + u8 duplex;
> + u32 speed;
> };
>
> /*
> --
> 1.8.3.1
^ permalink raw reply
* [PATCH net-next v5 6/6] net: mscc: PTP Hardware Clock (PHC) support
From: Antoine Tenart @ 2019-08-07 9:22 UTC (permalink / raw)
To: davem, richardcochran, alexandre.belloni, UNGLinuxDriver
Cc: Antoine Tenart, netdev, thomas.petazzoni, allan.nielsen
In-Reply-To: <20190807092214.19936-1-antoine.tenart@bootlin.com>
This patch adds support for PTP Hardware Clock (PHC) to the Ocelot
switch for both PTP 1-step and 2-step modes.
Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
---
drivers/net/ethernet/mscc/ocelot.c | 396 ++++++++++++++++++++++-
drivers/net/ethernet/mscc/ocelot.h | 39 +++
drivers/net/ethernet/mscc/ocelot_board.c | 113 ++++++-
3 files changed, 535 insertions(+), 13 deletions(-)
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 6932e615d4b0..0b74e4231f46 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -14,6 +14,7 @@
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/phy.h>
+#include <linux/ptp_clock_kernel.h>
#include <linux/skbuff.h>
#include <linux/iopoll.h>
#include <net/arp.h>
@@ -538,7 +539,7 @@ static int ocelot_port_stop(struct net_device *dev)
*/
static int ocelot_gen_ifh(u32 *ifh, struct frame_info *info)
{
- ifh[0] = IFH_INJ_BYPASS;
+ ifh[0] = IFH_INJ_BYPASS | ((0x1ff & info->rew_op) << 21);
ifh[1] = (0xf00 & info->port) >> 8;
ifh[2] = (0xff & info->port) << 24;
ifh[3] = (info->tag_type << 16) | info->vid;
@@ -548,6 +549,7 @@ static int ocelot_gen_ifh(u32 *ifh, struct frame_info *info)
static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev)
{
+ struct skb_shared_info *shinfo = skb_shinfo(skb);
struct ocelot_port *port = netdev_priv(dev);
struct ocelot *ocelot = port->ocelot;
u32 val, ifh[IFH_LEN];
@@ -566,6 +568,14 @@ static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev)
info.port = BIT(port->chip_port);
info.tag_type = IFH_TAG_TYPE_C;
info.vid = skb_vlan_tag_get(skb);
+
+ /* Check if timestamping is needed */
+ if (ocelot->ptp && shinfo->tx_flags & SKBTX_HW_TSTAMP) {
+ info.rew_op = port->ptp_cmd;
+ if (port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP)
+ info.rew_op |= (port->ts_id % 4) << 3;
+ }
+
ocelot_gen_ifh(ifh, &info);
for (i = 0; i < IFH_LEN; i++)
@@ -596,11 +606,53 @@ static int ocelot_port_xmit(struct sk_buff *skb, struct net_device *dev)
dev->stats.tx_packets++;
dev->stats.tx_bytes += skb->len;
- dev_kfree_skb_any(skb);
+
+ if (ocelot->ptp && shinfo->tx_flags & SKBTX_HW_TSTAMP &&
+ port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) {
+ struct ocelot_skb *oskb =
+ kzalloc(sizeof(struct ocelot_skb), GFP_ATOMIC);
+
+ skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+
+ oskb->skb = skb;
+ oskb->id = port->ts_id % 4;
+ port->ts_id++;
+
+ list_add_tail(&oskb->head, &port->skbs);
+ } else {
+ dev_kfree_skb_any(skb);
+ }
return NETDEV_TX_OK;
}
+void ocelot_get_hwtimestamp(struct ocelot *ocelot, struct timespec64 *ts)
+{
+ unsigned long flags;
+ u32 val;
+
+ spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
+
+ /* Read current PTP time to get seconds */
+ val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
+
+ val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
+ val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_SAVE);
+ ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
+ ts->tv_sec = ocelot_read_rix(ocelot, PTP_PIN_TOD_SEC_LSB, TOD_ACC_PIN);
+
+ /* Read packet HW timestamp from FIFO */
+ val = ocelot_read(ocelot, SYS_PTP_TXSTAMP);
+ ts->tv_nsec = SYS_PTP_TXSTAMP_PTP_TXSTAMP(val);
+
+ /* Sec has incremented since the ts was registered */
+ if ((ts->tv_sec & 0x1) != !!(val & SYS_PTP_TXSTAMP_PTP_TXSTAMP_SEC))
+ ts->tv_sec--;
+
+ spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+}
+EXPORT_SYMBOL(ocelot_get_hwtimestamp);
+
static int ocelot_mc_unsync(struct net_device *dev, const unsigned char *addr)
{
struct ocelot_port *port = netdev_priv(dev);
@@ -917,6 +969,97 @@ static int ocelot_get_port_parent_id(struct net_device *dev,
return 0;
}
+static int ocelot_hwstamp_get(struct ocelot_port *port, struct ifreq *ifr)
+{
+ struct ocelot *ocelot = port->ocelot;
+
+ return copy_to_user(ifr->ifr_data, &ocelot->hwtstamp_config,
+ sizeof(ocelot->hwtstamp_config)) ? -EFAULT : 0;
+}
+
+static int ocelot_hwstamp_set(struct ocelot_port *port, struct ifreq *ifr)
+{
+ struct ocelot *ocelot = port->ocelot;
+ struct hwtstamp_config cfg;
+
+ if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+ return -EFAULT;
+
+ /* reserved for future extensions */
+ if (cfg.flags)
+ return -EINVAL;
+
+ /* Tx type sanity check */
+ switch (cfg.tx_type) {
+ case HWTSTAMP_TX_ON:
+ port->ptp_cmd = IFH_REW_OP_TWO_STEP_PTP;
+ break;
+ case HWTSTAMP_TX_ONESTEP_SYNC:
+ /* IFH_REW_OP_ONE_STEP_PTP updates the correctional field, we
+ * need to update the origin time.
+ */
+ port->ptp_cmd = IFH_REW_OP_ORIGIN_PTP;
+ break;
+ case HWTSTAMP_TX_OFF:
+ port->ptp_cmd = 0;
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ mutex_lock(&ocelot->ptp_lock);
+
+ switch (cfg.rx_filter) {
+ case HWTSTAMP_FILTER_NONE:
+ break;
+ case HWTSTAMP_FILTER_ALL:
+ case HWTSTAMP_FILTER_SOME:
+ case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_NTP_ALL:
+ case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+ case HWTSTAMP_FILTER_PTP_V2_EVENT:
+ case HWTSTAMP_FILTER_PTP_V2_SYNC:
+ case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+ cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+ break;
+ default:
+ mutex_unlock(&ocelot->ptp_lock);
+ return -ERANGE;
+ }
+
+ /* Commit back the result & save it */
+ memcpy(&ocelot->hwtstamp_config, &cfg, sizeof(cfg));
+ mutex_unlock(&ocelot->ptp_lock);
+
+ return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+}
+
+static int ocelot_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+ struct ocelot_port *port = netdev_priv(dev);
+ struct ocelot *ocelot = port->ocelot;
+
+ /* The function is only used for PTP operations for now */
+ if (!ocelot->ptp)
+ return -EOPNOTSUPP;
+
+ switch (cmd) {
+ case SIOCSHWTSTAMP:
+ return ocelot_hwstamp_set(port, ifr);
+ case SIOCGHWTSTAMP:
+ return ocelot_hwstamp_get(port, ifr);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
static const struct net_device_ops ocelot_port_netdev_ops = {
.ndo_open = ocelot_port_open,
.ndo_stop = ocelot_port_stop,
@@ -933,6 +1076,7 @@ static const struct net_device_ops ocelot_port_netdev_ops = {
.ndo_set_features = ocelot_set_features,
.ndo_get_port_parent_id = ocelot_get_port_parent_id,
.ndo_setup_tc = ocelot_setup_tc,
+ .ndo_do_ioctl = ocelot_ioctl,
};
static void ocelot_get_strings(struct net_device *netdev, u32 sset, u8 *data)
@@ -1014,12 +1158,37 @@ static int ocelot_get_sset_count(struct net_device *dev, int sset)
return ocelot->num_stats;
}
+static int ocelot_get_ts_info(struct net_device *dev,
+ struct ethtool_ts_info *info)
+{
+ struct ocelot_port *ocelot_port = netdev_priv(dev);
+ struct ocelot *ocelot = ocelot_port->ocelot;
+
+ if (!ocelot->ptp)
+ return ethtool_op_get_ts_info(dev, info);
+
+ info->phc_index = ocelot->ptp_clock ?
+ ptp_clock_index(ocelot->ptp_clock) : -1;
+ info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_RX_SOFTWARE |
+ SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_TX_HARDWARE |
+ SOF_TIMESTAMPING_RX_HARDWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE;
+ info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON) |
+ BIT(HWTSTAMP_TX_ONESTEP_SYNC);
+ info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL);
+
+ return 0;
+}
+
static const struct ethtool_ops ocelot_ethtool_ops = {
.get_strings = ocelot_get_strings,
.get_ethtool_stats = ocelot_get_ethtool_stats,
.get_sset_count = ocelot_get_sset_count,
.get_link_ksettings = phy_ethtool_get_link_ksettings,
.set_link_ksettings = phy_ethtool_set_link_ksettings,
+ .get_ts_info = ocelot_get_ts_info,
};
static int ocelot_port_attr_stp_state_set(struct ocelot_port *ocelot_port,
@@ -1629,6 +1798,196 @@ struct notifier_block ocelot_switchdev_blocking_nb __read_mostly = {
};
EXPORT_SYMBOL(ocelot_switchdev_blocking_nb);
+int ocelot_ptp_gettime64(struct ptp_clock_info *ptp, struct timespec64 *ts)
+{
+ struct ocelot *ocelot = container_of(ptp, struct ocelot, ptp_info);
+ unsigned long flags;
+ time64_t s;
+ u32 val;
+ s64 ns;
+
+ spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
+
+ val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
+ val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
+ val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_SAVE);
+ ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
+
+ s = ocelot_read_rix(ocelot, PTP_PIN_TOD_SEC_MSB, TOD_ACC_PIN) & 0xffff;
+ s <<= 32;
+ s += ocelot_read_rix(ocelot, PTP_PIN_TOD_SEC_LSB, TOD_ACC_PIN);
+ ns = ocelot_read_rix(ocelot, PTP_PIN_TOD_NSEC, TOD_ACC_PIN);
+
+ spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+
+ /* Deal with negative values */
+ if (ns >= 0x3ffffff0 && ns <= 0x3fffffff) {
+ s--;
+ ns &= 0xf;
+ ns += 999999984;
+ }
+
+ set_normalized_timespec64(ts, s, ns);
+ return 0;
+}
+EXPORT_SYMBOL(ocelot_ptp_gettime64);
+
+static int ocelot_ptp_settime64(struct ptp_clock_info *ptp,
+ const struct timespec64 *ts)
+{
+ struct ocelot *ocelot = container_of(ptp, struct ocelot, ptp_info);
+ unsigned long flags;
+ u32 val;
+
+ spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
+
+ val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
+ val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
+ val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_IDLE);
+
+ ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
+
+ ocelot_write_rix(ocelot, lower_32_bits(ts->tv_sec), PTP_PIN_TOD_SEC_LSB,
+ TOD_ACC_PIN);
+ ocelot_write_rix(ocelot, upper_32_bits(ts->tv_sec), PTP_PIN_TOD_SEC_MSB,
+ TOD_ACC_PIN);
+ ocelot_write_rix(ocelot, ts->tv_nsec, PTP_PIN_TOD_NSEC, TOD_ACC_PIN);
+
+ val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
+ val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
+ val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_LOAD);
+
+ ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
+
+ spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+ return 0;
+}
+
+static int ocelot_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+ if (delta > -(NSEC_PER_SEC / 2) && delta < (NSEC_PER_SEC / 2)) {
+ struct ocelot *ocelot = container_of(ptp, struct ocelot, ptp_info);
+ unsigned long flags;
+ u32 val;
+
+ spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
+
+ val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
+ val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
+ val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_IDLE);
+
+ ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
+
+ ocelot_write_rix(ocelot, 0, PTP_PIN_TOD_SEC_LSB, TOD_ACC_PIN);
+ ocelot_write_rix(ocelot, 0, PTP_PIN_TOD_SEC_MSB, TOD_ACC_PIN);
+ ocelot_write_rix(ocelot, delta, PTP_PIN_TOD_NSEC, TOD_ACC_PIN);
+
+ val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
+ val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
+ val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_DELTA);
+
+ ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
+
+ spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+ } else {
+ /* Fall back using ocelot_ptp_settime64 which is not exact. */
+ struct timespec64 ts;
+ u64 now;
+
+ ocelot_ptp_gettime64(ptp, &ts);
+
+ now = ktime_to_ns(timespec64_to_ktime(ts));
+ ts = ns_to_timespec64(now + delta);
+
+ ocelot_ptp_settime64(ptp, &ts);
+ }
+ return 0;
+}
+
+static int ocelot_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+ struct ocelot *ocelot = container_of(ptp, struct ocelot, ptp_info);
+ u32 unit = 0, direction = 0;
+ unsigned long flags;
+ u64 adj = 0;
+
+ spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
+
+ if (!scaled_ppm)
+ goto disable_adj;
+
+ if (scaled_ppm < 0) {
+ direction = PTP_CFG_CLK_ADJ_CFG_DIR;
+ scaled_ppm = -scaled_ppm;
+ }
+
+ adj = PSEC_PER_SEC << 16;
+ do_div(adj, scaled_ppm);
+ do_div(adj, 1000);
+
+ /* If the adjustment value is too large, use ns instead */
+ if (adj >= (1L << 30)) {
+ unit = PTP_CFG_CLK_ADJ_FREQ_NS;
+ do_div(adj, 1000);
+ }
+
+ /* Still too big */
+ if (adj >= (1L << 30))
+ goto disable_adj;
+
+ ocelot_write(ocelot, unit | adj, PTP_CLK_CFG_ADJ_FREQ);
+ ocelot_write(ocelot, PTP_CFG_CLK_ADJ_CFG_ENA | direction,
+ PTP_CLK_CFG_ADJ_CFG);
+
+ spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+ return 0;
+
+disable_adj:
+ ocelot_write(ocelot, 0, PTP_CLK_CFG_ADJ_CFG);
+
+ spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+ return 0;
+}
+
+static struct ptp_clock_info ocelot_ptp_clock_info = {
+ .owner = THIS_MODULE,
+ .name = "ocelot ptp",
+ .max_adj = 0x7fffffff,
+ .n_alarm = 0,
+ .n_ext_ts = 0,
+ .n_per_out = 0,
+ .n_pins = 0,
+ .pps = 0,
+ .gettime64 = ocelot_ptp_gettime64,
+ .settime64 = ocelot_ptp_settime64,
+ .adjtime = ocelot_ptp_adjtime,
+ .adjfine = ocelot_ptp_adjfine,
+};
+
+static int ocelot_init_timestamp(struct ocelot *ocelot)
+{
+ ocelot->ptp_info = ocelot_ptp_clock_info;
+ ocelot->ptp_clock = ptp_clock_register(&ocelot->ptp_info, ocelot->dev);
+ if (IS_ERR(ocelot->ptp_clock))
+ return PTR_ERR(ocelot->ptp_clock);
+ /* Check if PHC support is missing at the configuration level */
+ if (!ocelot->ptp_clock)
+ return 0;
+
+ ocelot_write(ocelot, SYS_PTP_CFG_PTP_STAMP_WID(30), SYS_PTP_CFG);
+ ocelot_write(ocelot, 0xffffffff, ANA_TABLES_PTP_ID_LOW);
+ ocelot_write(ocelot, 0xffffffff, ANA_TABLES_PTP_ID_HIGH);
+
+ ocelot_write(ocelot, PTP_CFG_MISC_PTP_EN, PTP_CFG_MISC);
+
+ /* There is no device reconfiguration, PTP Rx stamping is always
+ * enabled.
+ */
+ ocelot->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+
+ return 0;
+}
+
int ocelot_probe_port(struct ocelot *ocelot, u8 port,
void __iomem *regs,
struct phy_device *phy)
@@ -1661,6 +2020,8 @@ int ocelot_probe_port(struct ocelot *ocelot, u8 port,
ocelot_mact_learn(ocelot, PGID_CPU, dev->dev_addr, ocelot_port->pvid,
ENTRYTYPE_LOCKED);
+ INIT_LIST_HEAD(&ocelot_port->skbs);
+
err = register_netdev(dev);
if (err) {
dev_err(ocelot->dev, "register_netdev failed\n");
@@ -1684,7 +2045,7 @@ EXPORT_SYMBOL(ocelot_probe_port);
int ocelot_init(struct ocelot *ocelot)
{
u32 port;
- int i, cpu = ocelot->num_phys_ports;
+ int i, ret, cpu = ocelot->num_phys_ports;
char queue_name[32];
ocelot->lags = devm_kcalloc(ocelot->dev, ocelot->num_phys_ports,
@@ -1699,6 +2060,8 @@ int ocelot_init(struct ocelot *ocelot)
return -ENOMEM;
mutex_init(&ocelot->stats_lock);
+ mutex_init(&ocelot->ptp_lock);
+ spin_lock_init(&ocelot->ptp_clock_lock);
snprintf(queue_name, sizeof(queue_name), "%s-stats",
dev_name(ocelot->dev));
ocelot->stats_queue = create_singlethread_workqueue(queue_name);
@@ -1812,16 +2175,43 @@ int ocelot_init(struct ocelot *ocelot)
INIT_DELAYED_WORK(&ocelot->stats_work, ocelot_check_stats_work);
queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
OCELOT_STATS_CHECK_DELAY);
+
+ if (ocelot->ptp) {
+ ret = ocelot_init_timestamp(ocelot);
+ if (ret) {
+ dev_err(ocelot->dev,
+ "Timestamp initialization failed\n");
+ return ret;
+ }
+ }
+
return 0;
}
EXPORT_SYMBOL(ocelot_init);
void ocelot_deinit(struct ocelot *ocelot)
{
+ struct list_head *pos, *tmp;
+ struct ocelot_port *port;
+ struct ocelot_skb *entry;
+ int i;
+
cancel_delayed_work(&ocelot->stats_work);
destroy_workqueue(ocelot->stats_queue);
mutex_destroy(&ocelot->stats_lock);
ocelot_ace_deinit();
+
+ for (i = 0; i < ocelot->num_phys_ports; i++) {
+ port = ocelot->ports[i];
+
+ list_for_each_safe(pos, tmp, &port->skbs) {
+ entry = list_entry(pos, struct ocelot_skb, head);
+
+ list_del(pos);
+ dev_kfree_skb_any(entry->skb);
+ kfree(entry);
+ }
+ }
}
EXPORT_SYMBOL(ocelot_deinit);
diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h
index 515dee6fa8a6..e40773c01a44 100644
--- a/drivers/net/ethernet/mscc/ocelot.h
+++ b/drivers/net/ethernet/mscc/ocelot.h
@@ -11,9 +11,11 @@
#include <linux/bitops.h>
#include <linux/etherdevice.h>
#include <linux/if_vlan.h>
+#include <linux/net_tstamp.h>
#include <linux/phy.h>
#include <linux/phy/phy.h>
#include <linux/platform_device.h>
+#include <linux/ptp_clock_kernel.h>
#include <linux/regmap.h>
#include "ocelot_ana.h"
@@ -39,6 +41,8 @@
#define OCELOT_STATS_CHECK_DELAY (2 * HZ)
+#define OCELOT_PTP_QUEUE_SZ 128
+
#define IFH_LEN 4
struct frame_info {
@@ -46,6 +50,8 @@ struct frame_info {
u16 port;
u16 vid;
u8 tag_type;
+ u16 rew_op;
+ u32 timestamp; /* rew_val */
};
#define IFH_INJ_BYPASS BIT(31)
@@ -54,6 +60,12 @@ struct frame_info {
#define IFH_TAG_TYPE_C 0
#define IFH_TAG_TYPE_S 1
+#define IFH_REW_OP_NOOP 0x0
+#define IFH_REW_OP_DSCP 0x1
+#define IFH_REW_OP_ONE_STEP_PTP 0x2
+#define IFH_REW_OP_TWO_STEP_PTP 0x3
+#define IFH_REW_OP_ORIGIN_PTP 0x5
+
#define OCELOT_SPEED_2500 0
#define OCELOT_SPEED_1000 1
#define OCELOT_SPEED_100 2
@@ -401,6 +413,13 @@ enum ocelot_regfield {
REGFIELD_MAX
};
+enum ocelot_clk_pins {
+ ALT_PPS_PIN = 1,
+ EXT_CLK_PIN,
+ ALT_LDST_PIN,
+ TOD_ACC_PIN
+};
+
struct ocelot_multicast {
struct list_head list;
unsigned char addr[ETH_ALEN];
@@ -450,6 +469,13 @@ struct ocelot {
u64 *stats;
struct delayed_work stats_work;
struct workqueue_struct *stats_queue;
+
+ u8 ptp:1;
+ struct ptp_clock *ptp_clock;
+ struct ptp_clock_info ptp_info;
+ struct hwtstamp_config hwtstamp_config;
+ struct mutex ptp_lock; /* Protects the PTP interface state */
+ spinlock_t ptp_clock_lock; /* Protects the PTP clock */
};
struct ocelot_port {
@@ -473,6 +499,16 @@ struct ocelot_port {
struct phy *serdes;
struct ocelot_port_tc tc;
+
+ u8 ptp_cmd;
+ struct list_head skbs;
+ u8 ts_id;
+};
+
+struct ocelot_skb {
+ struct list_head head;
+ struct sk_buff *skb;
+ u8 id;
};
u32 __ocelot_read_ix(struct ocelot *ocelot, u32 reg, u32 offset);
@@ -517,4 +553,7 @@ extern struct notifier_block ocelot_netdevice_nb;
extern struct notifier_block ocelot_switchdev_nb;
extern struct notifier_block ocelot_switchdev_blocking_nb;
+int ocelot_ptp_gettime64(struct ptp_clock_info *ptp, struct timespec64 *ts);
+void ocelot_get_hwtimestamp(struct ocelot *ocelot, struct timespec64 *ts);
+
#endif
diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c
index df8d15994a89..446a4dae6078 100644
--- a/drivers/net/ethernet/mscc/ocelot_board.c
+++ b/drivers/net/ethernet/mscc/ocelot_board.c
@@ -31,6 +31,8 @@ static int ocelot_parse_ifh(u32 *_ifh, struct frame_info *info)
info->len = OCELOT_BUFFER_CELL_SZ * wlen + llen - 80;
+ info->timestamp = IFH_EXTRACT_BITFIELD64(ifh[0], 21, 32);
+
info->port = IFH_EXTRACT_BITFIELD64(ifh[1], 43, 4);
info->tag_type = IFH_EXTRACT_BITFIELD64(ifh[1], 16, 1);
@@ -92,13 +94,14 @@ static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg)
return IRQ_NONE;
do {
- struct sk_buff *skb;
+ struct skb_shared_hwtstamps *shhwtstamps;
+ u64 tod_in_ns, full_ts_in_ns;
+ struct frame_info info = {};
struct net_device *dev;
- u32 *buf;
+ u32 ifh[4], val, *buf;
+ struct timespec64 ts;
int sz, len, buf_len;
- u32 ifh[4];
- u32 val;
- struct frame_info info;
+ struct sk_buff *skb;
for (i = 0; i < IFH_LEN; i++) {
err = ocelot_rx_frame_word(ocelot, grp, true, &ifh[i]);
@@ -145,6 +148,22 @@ static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg)
break;
}
+ if (ocelot->ptp) {
+ ocelot_ptp_gettime64(&ocelot->ptp_info, &ts);
+
+ tod_in_ns = ktime_set(ts.tv_sec, ts.tv_nsec);
+ if ((tod_in_ns & 0xffffffff) < info.timestamp)
+ full_ts_in_ns = (((tod_in_ns >> 32) - 1) << 32) |
+ info.timestamp;
+ else
+ full_ts_in_ns = (tod_in_ns & GENMASK_ULL(63, 32)) |
+ info.timestamp;
+
+ shhwtstamps = skb_hwtstamps(skb);
+ memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
+ shhwtstamps->hwtstamp = full_ts_in_ns;
+ }
+
/* Everything we see on an interface that is in the HW bridge
* has already been forwarded.
*/
@@ -164,6 +183,66 @@ static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg)
return IRQ_HANDLED;
}
+static irqreturn_t ocelot_ptp_rdy_irq_handler(int irq, void *arg)
+{
+ int budget = OCELOT_PTP_QUEUE_SZ;
+ struct ocelot *ocelot = arg;
+
+ while (budget--) {
+ struct skb_shared_hwtstamps shhwtstamps;
+ struct list_head *pos, *tmp;
+ struct sk_buff *skb = NULL;
+ struct ocelot_skb *entry;
+ struct ocelot_port *port;
+ struct timespec64 ts;
+ u32 val, id, txport;
+
+ val = ocelot_read(ocelot, SYS_PTP_STATUS);
+
+ /* Check if a timestamp can be retrieved */
+ if (!(val & SYS_PTP_STATUS_PTP_MESS_VLD))
+ break;
+
+ WARN_ON(val & SYS_PTP_STATUS_PTP_OVFL);
+
+ /* Retrieve the ts ID and Tx port */
+ id = SYS_PTP_STATUS_PTP_MESS_ID_X(val);
+ txport = SYS_PTP_STATUS_PTP_MESS_TXPORT_X(val);
+
+ /* Retrieve its associated skb */
+ port = ocelot->ports[txport];
+
+ list_for_each_safe(pos, tmp, &port->skbs) {
+ entry = list_entry(pos, struct ocelot_skb, head);
+ if (entry->id != id)
+ continue;
+
+ skb = entry->skb;
+
+ list_del(pos);
+ kfree(entry);
+ }
+
+ /* Next ts */
+ ocelot_write(ocelot, SYS_PTP_NXT_PTP_NXT, SYS_PTP_NXT);
+
+ if (unlikely(!skb))
+ continue;
+
+ /* Get the h/w timestamp */
+ ocelot_get_hwtimestamp(ocelot, &ts);
+
+ /* Set the timestamp into the skb */
+ memset(&shhwtstamps, 0, sizeof(shhwtstamps));
+ shhwtstamps.hwtstamp = ktime_set(ts.tv_sec, ts.tv_nsec);
+ skb_tstamp_tx(skb, &shhwtstamps);
+
+ dev_kfree_skb_any(skb);
+ }
+
+ return IRQ_HANDLED;
+}
+
static const struct of_device_id mscc_ocelot_match[] = {
{ .compatible = "mscc,vsc7514-switch" },
{ }
@@ -172,12 +251,12 @@ MODULE_DEVICE_TABLE(of, mscc_ocelot_match);
static int mscc_ocelot_probe(struct platform_device *pdev)
{
- int err, irq;
- unsigned int i;
struct device_node *np = pdev->dev.of_node;
struct device_node *ports, *portnp;
+ int err, irq_xtr, irq_ptp_rdy;
struct ocelot *ocelot;
struct regmap *hsio;
+ unsigned int i;
u32 val;
struct {
@@ -232,16 +311,30 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
if (err)
return err;
- irq = platform_get_irq_byname(pdev, "xtr");
- if (irq < 0)
+ irq_xtr = platform_get_irq_byname(pdev, "xtr");
+ if (irq_xtr < 0)
return -ENODEV;
- err = devm_request_threaded_irq(&pdev->dev, irq, NULL,
+ err = devm_request_threaded_irq(&pdev->dev, irq_xtr, NULL,
ocelot_xtr_irq_handler, IRQF_ONESHOT,
"frame extraction", ocelot);
if (err)
return err;
+ irq_ptp_rdy = platform_get_irq_byname(pdev, "ptp_rdy");
+ if (irq_ptp_rdy > 0) {
+ err = devm_request_threaded_irq(&pdev->dev, irq_ptp_rdy, NULL,
+ ocelot_ptp_rdy_irq_handler,
+ IRQF_ONESHOT, "ptp ready",
+ ocelot);
+ if (err)
+ return err;
+
+ /* Check if we can support PTP */
+ if (ocelot->targets[PTP])
+ ocelot->ptp = 1;
+ }
+
regmap_field_write(ocelot->regfields[SYS_RESET_CFG_MEM_INIT], 1);
regmap_field_write(ocelot->regfields[SYS_RESET_CFG_MEM_ENA], 1);
--
2.21.0
^ permalink raw reply related
* [PATCH net-next v5 5/6] net: mscc: remove the frame_info cpuq member
From: Antoine Tenart @ 2019-08-07 9:22 UTC (permalink / raw)
To: davem, richardcochran, alexandre.belloni, UNGLinuxDriver
Cc: Antoine Tenart, netdev, thomas.petazzoni, allan.nielsen
In-Reply-To: <20190807092214.19936-1-antoine.tenart@bootlin.com>
In struct frame_info, the cpuq member is never used. This cosmetic patch
removes it from the structure, and from the parsing of the frame header
as it's only set but never used.
Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
---
drivers/net/ethernet/mscc/ocelot.h | 1 -
drivers/net/ethernet/mscc/ocelot_board.c | 1 -
2 files changed, 2 deletions(-)
diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h
index e0da8b4eddf2..515dee6fa8a6 100644
--- a/drivers/net/ethernet/mscc/ocelot.h
+++ b/drivers/net/ethernet/mscc/ocelot.h
@@ -45,7 +45,6 @@ struct frame_info {
u32 len;
u16 port;
u16 vid;
- u8 cpuq;
u8 tag_type;
};
diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c
index 5e4f1718dd99..df8d15994a89 100644
--- a/drivers/net/ethernet/mscc/ocelot_board.c
+++ b/drivers/net/ethernet/mscc/ocelot_board.c
@@ -33,7 +33,6 @@ static int ocelot_parse_ifh(u32 *_ifh, struct frame_info *info)
info->port = IFH_EXTRACT_BITFIELD64(ifh[1], 43, 4);
- info->cpuq = IFH_EXTRACT_BITFIELD64(ifh[1], 20, 8);
info->tag_type = IFH_EXTRACT_BITFIELD64(ifh[1], 16, 1);
info->vid = IFH_EXTRACT_BITFIELD64(ifh[1], 0, 12);
--
2.21.0
^ permalink raw reply related
* [PATCH net-next v5 1/6] Documentation/bindings: net: ocelot: document the PTP bank
From: Antoine Tenart @ 2019-08-07 9:22 UTC (permalink / raw)
To: davem, richardcochran, alexandre.belloni, UNGLinuxDriver
Cc: Antoine Tenart, netdev, thomas.petazzoni, allan.nielsen
In-Reply-To: <20190807092214.19936-1-antoine.tenart@bootlin.com>
One additional register range needs to be described within the Ocelot
device tree node: the PTP. This patch documents the binding needed to do
so.
Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
---
Documentation/devicetree/bindings/net/mscc-ocelot.txt | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
diff --git a/Documentation/devicetree/bindings/net/mscc-ocelot.txt b/Documentation/devicetree/bindings/net/mscc-ocelot.txt
index 9e5c17d426ce..4d05a3b0f786 100644
--- a/Documentation/devicetree/bindings/net/mscc-ocelot.txt
+++ b/Documentation/devicetree/bindings/net/mscc-ocelot.txt
@@ -12,6 +12,7 @@ Required properties:
- "sys"
- "rew"
- "qs"
+ - "ptp" (optional due to backward compatibility)
- "qsys"
- "ana"
- "portX" with X from 0 to the number of last port index available on that
@@ -44,6 +45,7 @@ Example:
reg = <0x1010000 0x10000>,
<0x1030000 0x10000>,
<0x1080000 0x100>,
+ <0x10e0000 0x10000>,
<0x11e0000 0x100>,
<0x11f0000 0x100>,
<0x1200000 0x100>,
@@ -57,9 +59,10 @@ Example:
<0x1280000 0x100>,
<0x1800000 0x80000>,
<0x1880000 0x10000>;
- reg-names = "sys", "rew", "qs", "port0", "port1", "port2",
- "port3", "port4", "port5", "port6", "port7",
- "port8", "port9", "port10", "qsys", "ana";
+ reg-names = "sys", "rew", "qs", "ptp", "port0", "port1",
+ "port2", "port3", "port4", "port5", "port6",
+ "port7", "port8", "port9", "port10", "qsys",
+ "ana";
interrupts = <21 22>;
interrupt-names = "xtr", "inj";
--
2.21.0
^ permalink raw reply related
* [PATCH net-next v5 4/6] net: mscc: improve the frame header parsing readability
From: Antoine Tenart @ 2019-08-07 9:22 UTC (permalink / raw)
To: davem, richardcochran, alexandre.belloni, UNGLinuxDriver
Cc: Antoine Tenart, netdev, thomas.petazzoni, allan.nielsen
In-Reply-To: <20190807092214.19936-1-antoine.tenart@bootlin.com>
This cosmetic patch improves the frame header parsing readability by
introducing a new macro to access and mask its fields.
Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
---
drivers/net/ethernet/mscc/ocelot_board.c | 24 +++++++++++++-----------
1 file changed, 13 insertions(+), 11 deletions(-)
diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c
index 990027f04d1b..5e4f1718dd99 100644
--- a/drivers/net/ethernet/mscc/ocelot_board.c
+++ b/drivers/net/ethernet/mscc/ocelot_board.c
@@ -16,24 +16,26 @@
#include "ocelot.h"
-static int ocelot_parse_ifh(u32 *ifh, struct frame_info *info)
+#define IFH_EXTRACT_BITFIELD64(x, o, w) (((x) >> (o)) & GENMASK_ULL((w) - 1, 0))
+
+static int ocelot_parse_ifh(u32 *_ifh, struct frame_info *info)
{
- int i;
u8 llen, wlen;
+ u64 ifh[2];
+
+ ifh[0] = be64_to_cpu(((__force __be64 *)_ifh)[0]);
+ ifh[1] = be64_to_cpu(((__force __be64 *)_ifh)[1]);
- /* The IFH is in network order, switch to CPU order */
- for (i = 0; i < IFH_LEN; i++)
- ifh[i] = ntohl((__force __be32)ifh[i]);
+ wlen = IFH_EXTRACT_BITFIELD64(ifh[0], 7, 8);
+ llen = IFH_EXTRACT_BITFIELD64(ifh[0], 15, 6);
- wlen = (ifh[1] >> 7) & 0xff;
- llen = (ifh[1] >> 15) & 0x3f;
info->len = OCELOT_BUFFER_CELL_SZ * wlen + llen - 80;
- info->port = (ifh[2] & GENMASK(14, 11)) >> 11;
+ info->port = IFH_EXTRACT_BITFIELD64(ifh[1], 43, 4);
- info->cpuq = (ifh[3] & GENMASK(27, 20)) >> 20;
- info->tag_type = (ifh[3] & BIT(16)) >> 16;
- info->vid = ifh[3] & GENMASK(11, 0);
+ info->cpuq = IFH_EXTRACT_BITFIELD64(ifh[1], 20, 8);
+ info->tag_type = IFH_EXTRACT_BITFIELD64(ifh[1], 16, 1);
+ info->vid = IFH_EXTRACT_BITFIELD64(ifh[1], 0, 12);
return 0;
}
--
2.21.0
^ permalink raw reply related
* [PATCH net-next v5 3/6] net: mscc: describe the PTP register range
From: Antoine Tenart @ 2019-08-07 9:22 UTC (permalink / raw)
To: davem, richardcochran, alexandre.belloni, UNGLinuxDriver
Cc: Antoine Tenart, netdev, thomas.petazzoni, allan.nielsen
In-Reply-To: <20190807092214.19936-1-antoine.tenart@bootlin.com>
This patch adds support for using the PTP register range, and adds a
description of its registers. This bank is used when configuring PTP.
Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
---
drivers/net/ethernet/mscc/ocelot.h | 9 ++++++
drivers/net/ethernet/mscc/ocelot_board.c | 10 +++++-
drivers/net/ethernet/mscc/ocelot_ptp.h | 41 ++++++++++++++++++++++++
drivers/net/ethernet/mscc/ocelot_regs.c | 11 +++++++
4 files changed, 70 insertions(+), 1 deletion(-)
create mode 100644 drivers/net/ethernet/mscc/ocelot_ptp.h
diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h
index f7eeb4806897..e0da8b4eddf2 100644
--- a/drivers/net/ethernet/mscc/ocelot.h
+++ b/drivers/net/ethernet/mscc/ocelot.h
@@ -23,6 +23,7 @@
#include "ocelot_sys.h"
#include "ocelot_qs.h"
#include "ocelot_tc.h"
+#include "ocelot_ptp.h"
#define PGID_AGGR 64
#define PGID_SRC 80
@@ -71,6 +72,7 @@ enum ocelot_target {
SYS,
S2,
HSIO,
+ PTP,
TARGET_MAX,
};
@@ -343,6 +345,13 @@ enum ocelot_reg {
S2_CACHE_ACTION_DAT,
S2_CACHE_CNT_DAT,
S2_CACHE_TG_DAT,
+ PTP_PIN_CFG = PTP << TARGET_OFFSET,
+ PTP_PIN_TOD_SEC_MSB,
+ PTP_PIN_TOD_SEC_LSB,
+ PTP_PIN_TOD_NSEC,
+ PTP_CFG_MISC,
+ PTP_CLK_CFG_ADJ_CFG,
+ PTP_CLK_CFG_ADJ_FREQ,
};
enum ocelot_regfield {
diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c
index 2451d4a96490..990027f04d1b 100644
--- a/drivers/net/ethernet/mscc/ocelot_board.c
+++ b/drivers/net/ethernet/mscc/ocelot_board.c
@@ -182,6 +182,7 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
struct {
enum ocelot_target id;
char *name;
+ u8 optional:1;
} res[] = {
{ SYS, "sys" },
{ REW, "rew" },
@@ -189,6 +190,7 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
{ ANA, "ana" },
{ QS, "qs" },
{ S2, "s2" },
+ { PTP, "ptp", 1 },
};
if (!np && !pdev->dev.platform_data)
@@ -205,8 +207,14 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
struct regmap *target;
target = ocelot_io_platform_init(ocelot, pdev, res[i].name);
- if (IS_ERR(target))
+ if (IS_ERR(target)) {
+ if (res[i].optional) {
+ ocelot->targets[res[i].id] = NULL;
+ continue;
+ }
+
return PTR_ERR(target);
+ }
ocelot->targets[res[i].id] = target;
}
diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.h b/drivers/net/ethernet/mscc/ocelot_ptp.h
new file mode 100644
index 000000000000..9ede14a12573
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_ptp.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/*
+ * Microsemi Ocelot Switch driver
+ *
+ * License: Dual MIT/GPL
+ * Copyright (c) 2017 Microsemi Corporation
+ */
+
+#ifndef _MSCC_OCELOT_PTP_H_
+#define _MSCC_OCELOT_PTP_H_
+
+#define PTP_PIN_CFG_RSZ 0x20
+#define PTP_PIN_TOD_SEC_MSB_RSZ PTP_PIN_CFG_RSZ
+#define PTP_PIN_TOD_SEC_LSB_RSZ PTP_PIN_CFG_RSZ
+#define PTP_PIN_TOD_NSEC_RSZ PTP_PIN_CFG_RSZ
+
+#define PTP_PIN_CFG_DOM BIT(0)
+#define PTP_PIN_CFG_SYNC BIT(2)
+#define PTP_PIN_CFG_ACTION(x) ((x) << 3)
+#define PTP_PIN_CFG_ACTION_MASK PTP_PIN_CFG_ACTION(0x7)
+
+enum {
+ PTP_PIN_ACTION_IDLE = 0,
+ PTP_PIN_ACTION_LOAD,
+ PTP_PIN_ACTION_SAVE,
+ PTP_PIN_ACTION_CLOCK,
+ PTP_PIN_ACTION_DELTA,
+ PTP_PIN_ACTION_NOSYNC,
+ PTP_PIN_ACTION_SYNC,
+};
+
+#define PTP_CFG_MISC_PTP_EN BIT(2)
+
+#define PSEC_PER_SEC 1000000000000LL
+
+#define PTP_CFG_CLK_ADJ_CFG_ENA BIT(0)
+#define PTP_CFG_CLK_ADJ_CFG_DIR BIT(1)
+
+#define PTP_CFG_CLK_ADJ_FREQ_NS BIT(30)
+
+#endif
diff --git a/drivers/net/ethernet/mscc/ocelot_regs.c b/drivers/net/ethernet/mscc/ocelot_regs.c
index 6c387f994ec5..e59977d20400 100644
--- a/drivers/net/ethernet/mscc/ocelot_regs.c
+++ b/drivers/net/ethernet/mscc/ocelot_regs.c
@@ -234,6 +234,16 @@ static const u32 ocelot_s2_regmap[] = {
REG(S2_CACHE_TG_DAT, 0x000388),
};
+static const u32 ocelot_ptp_regmap[] = {
+ REG(PTP_PIN_CFG, 0x000000),
+ REG(PTP_PIN_TOD_SEC_MSB, 0x000004),
+ REG(PTP_PIN_TOD_SEC_LSB, 0x000008),
+ REG(PTP_PIN_TOD_NSEC, 0x00000c),
+ REG(PTP_CFG_MISC, 0x0000a0),
+ REG(PTP_CLK_CFG_ADJ_CFG, 0x0000a4),
+ REG(PTP_CLK_CFG_ADJ_FREQ, 0x0000a8),
+};
+
static const u32 *ocelot_regmap[] = {
[ANA] = ocelot_ana_regmap,
[QS] = ocelot_qs_regmap,
@@ -241,6 +251,7 @@ static const u32 *ocelot_regmap[] = {
[REW] = ocelot_rew_regmap,
[SYS] = ocelot_sys_regmap,
[S2] = ocelot_s2_regmap,
+ [PTP] = ocelot_ptp_regmap,
};
static const struct reg_field ocelot_regfields[] = {
--
2.21.0
^ permalink raw reply related
* [PATCH net-next v5 2/6] Documentation/bindings: net: ocelot: document the PTP ready IRQ
From: Antoine Tenart @ 2019-08-07 9:22 UTC (permalink / raw)
To: davem, richardcochran, alexandre.belloni, UNGLinuxDriver
Cc: Antoine Tenart, netdev, thomas.petazzoni, allan.nielsen
In-Reply-To: <20190807092214.19936-1-antoine.tenart@bootlin.com>
One additional interrupt needs to be described within the Ocelot device
tree node: the PTP ready one. This patch documents the binding needed to
do so.
Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
---
Documentation/devicetree/bindings/net/mscc-ocelot.txt | 11 ++++++-----
1 file changed, 6 insertions(+), 5 deletions(-)
diff --git a/Documentation/devicetree/bindings/net/mscc-ocelot.txt b/Documentation/devicetree/bindings/net/mscc-ocelot.txt
index 4d05a3b0f786..3b6290b45ce5 100644
--- a/Documentation/devicetree/bindings/net/mscc-ocelot.txt
+++ b/Documentation/devicetree/bindings/net/mscc-ocelot.txt
@@ -17,9 +17,10 @@ Required properties:
- "ana"
- "portX" with X from 0 to the number of last port index available on that
switch
-- interrupts: Should contain the switch interrupts for frame extraction and
- frame injection
-- interrupt-names: should contain the interrupt names: "xtr", "inj"
+- interrupts: Should contain the switch interrupts for frame extraction,
+ frame injection and PTP ready.
+- interrupt-names: should contain the interrupt names: "xtr", "inj". Can contain
+ "ptp_rdy" which is optional due to backward compatibility.
- ethernet-ports: A container for child nodes representing switch ports.
The ethernet-ports container has the following properties
@@ -63,8 +64,8 @@ Example:
"port2", "port3", "port4", "port5", "port6",
"port7", "port8", "port9", "port10", "qsys",
"ana";
- interrupts = <21 22>;
- interrupt-names = "xtr", "inj";
+ interrupts = <18 21 22>;
+ interrupt-names = "ptp_rdy", "xtr", "inj";
ethernet-ports {
#address-cells = <1>;
--
2.21.0
^ permalink raw reply related
* [PATCH net-next v5 0/6] net: mscc: PTP Hardware Clock (PHC) support
From: Antoine Tenart @ 2019-08-07 9:22 UTC (permalink / raw)
To: davem, richardcochran, alexandre.belloni, UNGLinuxDriver
Cc: Antoine Tenart, netdev, thomas.petazzoni, allan.nielsen
Hello,
This series introduces the PTP Hardware Clock (PHC) support to the Mscc
Ocelot switch driver. In order to make use of this, a new register bank
is added and described in the device tree, as well as a new interrupt.
The use this bank and interrupt was made optional in the driver for dt
compatibility reasons.
Thanks!
Antoine
Since v4:
- Added SKBTX_IN_PROGRESS.
- Fixed two xmas trees.
- Rework the loop condition in ocelot_ptp_rdy_irq_handler.
Since v3:
- Fixed a spin_unlock_irqrestore issue.
Since v2:
- Prevented from a possible infinite loop when reading the h/w
timestamps.
- s/GFP_KERNEL/GFP_ATOMIC/ in the Tx path.
- Set rx_filter to HWTSTAMP_FILTER_PTP_V2_EVENT at probe.
- Fixed s/w timestamping dependencies.
- Added Paul Burton's Acked-by on patches 2 and 4.
Since v1:
- Used list_for_each_safe() in ocelot_deinit().
- Fixed a memory leak in ocelot_deinit() by calling
dev_kfree_skb_any().
- Fixed a locking issue in get_hwtimestamp().
- Handled the NULL case of ptp_clock_register().
- Added comments on optional dt properties.
Antoine Tenart (6):
Documentation/bindings: net: ocelot: document the PTP bank
Documentation/bindings: net: ocelot: document the PTP ready IRQ
net: mscc: describe the PTP register range
net: mscc: improve the frame header parsing readability
net: mscc: remove the frame_info cpuq member
net: mscc: PTP Hardware Clock (PHC) support
.../devicetree/bindings/net/mscc-ocelot.txt | 20 +-
drivers/net/ethernet/mscc/ocelot.c | 396 +++++++++++++++++-
drivers/net/ethernet/mscc/ocelot.h | 49 ++-
drivers/net/ethernet/mscc/ocelot_board.c | 146 ++++++-
drivers/net/ethernet/mscc/ocelot_ptp.h | 41 ++
drivers/net/ethernet/mscc/ocelot_regs.c | 11 +
6 files changed, 629 insertions(+), 34 deletions(-)
create mode 100644 drivers/net/ethernet/mscc/ocelot_ptp.h
--
2.21.0
^ permalink raw reply
* Re: [PATCH -next] iwlwifi: dbg: work around clang bug by marking debug strings static
From: Joe Perches @ 2019-08-07 9:18 UTC (permalink / raw)
To: Nathan Chancellor, Nick Desaulniers
Cc: Johannes Berg, Michael Ellerman, Kalle Valo, Luca Coelho,
Arnd Bergmann, Emmanuel Grumbach, Intel Linux Wireless,
David S. Miller, Shahar S Matityahu, Sara Sharon, linux-wireless,
netdev, LKML, clang-built-linux
In-Reply-To: <20190807051516.GA117639@archlinux-threadripper>
On Tue, 2019-08-06 at 22:15 -0700, Nathan Chancellor wrote:
> Just for everyone else (since I commented on our issue tracker), this is
> now fixed in Linus's tree as of commit 1f6607250331 ("iwlwifi: dbg_ini:
> fix compile time assert build errors").
I think this change is incomplete and suggest you add this
to remove the use of another const char * format.
---
drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index 4d81776f576d..6b15e2e8cd37 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -2593,20 +2593,20 @@ static void iwl_fw_dbg_update_regions(struct iwl_fw_runtime *fwrt,
{
void *iter = (void *)tlv->region_config;
int i, size = le32_to_cpu(tlv->num_regions);
- const char *err_st =
- "WRT: ext=%d. Invalid region %s %d for apply point %d\n";
for (i = 0; i < size; i++) {
struct iwl_fw_ini_region_cfg *reg = iter, **active;
int id = le32_to_cpu(reg->region_id);
u32 type = le32_to_cpu(reg->region_type);
- if (WARN(id >= ARRAY_SIZE(fwrt->dump.active_regs), err_st, ext,
- "id", id, pnt))
+ if (WARN(id >= ARRAY_SIZE(fwrt->dump.active_regs),
+ "WRT: ext=%d. Invalid region id %d for apply point %d\n",
+ ext, id, pnt))
break;
- if (WARN(type == 0 || type >= IWL_FW_INI_REGION_NUM, err_st,
- ext, "type", type, pnt))
+ if (WARN(type == 0 || type >= IWL_FW_INI_REGION_NUM,
+ "WRT: ext=%d. Invalid region type %d for apply point %d\n",
+ ext, type, pnt))
break;
active = &fwrt->dump.active_regs[id];
^ permalink raw reply related
* Re: [PATCH v2 1/1] net: bridge: use mac_len in bridge forwarding
From: Nikolay Aleksandrov @ 2019-08-07 9:17 UTC (permalink / raw)
To: Zahari Doychev, netdev
Cc: bridge, roopa, jhs, dsahern, simon.horman, makita.toshiaki,
xiyou.wangcong, jiri, ast, johannes
In-Reply-To: <20190805153740.29627-2-zahari.doychev@linux.com>
Hi Zahari,
On 05/08/2019 18:37, Zahari Doychev wrote:
> The bridge code cannot forward packets from various paths that set up the
> SKBs in different ways. Some of these packets get corrupted during the
> forwarding as not always is just ETH_HLEN pulled at the front. This happens
> e.g. when VLAN tags are pushed bu using tc act_vlan on ingress.
Overall the patch looks good, I think it shouldn't introduce any regressions
at least from the codepaths I was able to inspect, but please include more
details in here from the cover letter, in fact you don't need it just add all of
the details here so we have them, especially the test setup. Also please provide
some details how this patch was tested. It'd be great if you could provide a
selftest for it so we can make sure it's considered when doing future changes.
Thank you,
Nik
>
> The problem is fixed by using skb->mac_len instead of ETH_HLEN, which makes
> sure that the skb headers are correctly restored. This usually does not
> change anything, execpt the local bridge transmits which now need to set
> the skb->mac_len correctly in br_dev_xmit, as well as the broken case noted
> above.
>
> Signed-off-by: Zahari Doychev <zahari.doychev@linux.com>
> ---
> net/bridge/br_device.c | 3 ++-
> net/bridge/br_forward.c | 4 ++--
> net/bridge/br_vlan.c | 3 ++-
> 3 files changed, 6 insertions(+), 4 deletions(-)
>
> diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
> index 681b72862c16..aeb77ff60311 100644
> --- a/net/bridge/br_device.c
> +++ b/net/bridge/br_device.c
> @@ -55,8 +55,9 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
> BR_INPUT_SKB_CB(skb)->frag_max_size = 0;
>
> skb_reset_mac_header(skb);
> + skb_reset_mac_len(skb);
> eth = eth_hdr(skb);
> - skb_pull(skb, ETH_HLEN);
> + skb_pull(skb, skb->mac_len);
>
> if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid))
> goto out;
> diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
> index 86637000f275..edb4f3533f05 100644
> --- a/net/bridge/br_forward.c
> +++ b/net/bridge/br_forward.c
> @@ -32,7 +32,7 @@ static inline int should_deliver(const struct net_bridge_port *p,
>
> int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
> {
> - skb_push(skb, ETH_HLEN);
> + skb_push(skb, skb->mac_len);
> if (!is_skb_forwardable(skb->dev, skb))
> goto drop;
>
> @@ -94,7 +94,7 @@ static void __br_forward(const struct net_bridge_port *to,
> net = dev_net(indev);
> } else {
> if (unlikely(netpoll_tx_running(to->br->dev))) {
> - skb_push(skb, ETH_HLEN);
> + skb_push(skb, skb->mac_len);
> if (!is_skb_forwardable(skb->dev, skb))
> kfree_skb(skb);
> else
> diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
> index 021cc9f66804..88244c9cc653 100644
> --- a/net/bridge/br_vlan.c
> +++ b/net/bridge/br_vlan.c
> @@ -466,13 +466,14 @@ static bool __allowed_ingress(const struct net_bridge *br,
> /* Tagged frame */
> if (skb->vlan_proto != br->vlan_proto) {
> /* Protocol-mismatch, empty out vlan_tci for new tag */
> - skb_push(skb, ETH_HLEN);
> + skb_push(skb, skb->mac_len);
> skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
> skb_vlan_tag_get(skb));
> if (unlikely(!skb))
> return false;
>
> skb_pull(skb, ETH_HLEN);
> + skb_reset_network_header(skb);
> skb_reset_mac_len(skb);
> *vid = 0;
> tagged = false;
>
^ permalink raw reply
* Re: [PATCH v2 bpf-next 1/4] bpf: unprivileged BPF access via /dev/bpf
From: Lorenz Bauer @ 2019-08-07 9:03 UTC (permalink / raw)
To: Andy Lutomirski
Cc: Alexei Starovoitov, Song Liu, Kees Cook, Networking, bpf,
Alexei Starovoitov, Daniel Borkmann, Kernel Team, Jann Horn,
Greg KH, Linux API, LSM List
In-Reply-To: <CALCETrXEHL3+NAY6P6vUj7Pvd9ZpZsYC6VCLXOaNxb90a_POGw@mail.gmail.com>
On Wed, 7 Aug 2019 at 06:24, Andy Lutomirski <luto@kernel.org> wrote:
> a) Those that, by design, control privileged operations. This
> includes most attach calls, but it also includes allow_ptr_leaks,
> bpf_probe_read(), and quite a few other things. It also includes all
> of the by_id calls, I think, unless some clever modification to the
> way they worked would isolate different users' objects. I think that
> persistent objects can do pretty much everything that by_id users
> would need, so this isn't a big deal.
Slightly OT, since this is an implementation question: GET_MAP_FD_BY_ID
is useful to iterate a nested map. This isn't covered by rights to
persistent objects,
so it would need some thought.
--
Lorenz Bauer | Systems Engineer
6th Floor, County Hall/The Riverside Building, SE1 7PB, UK
www.cloudflare.com
^ permalink raw reply
* Re: [PATCH] net: stmmac: Fix the miscalculation of mapping from rxq to dma channel
From: Shaokun Zhang @ 2019-08-07 8:55 UTC (permalink / raw)
To: Jose Abreu, netdev@vger.kernel.org,
linux-stm32@st-md-mailman.stormreply.com
Cc: yuqi jin, Giuseppe Cavallaro, Alexandre Torgue, David S. Miller,
Maxime Coquelin
In-Reply-To: <BN8PR12MB3266D248C58DB7ABE6238A49D3D40@BN8PR12MB3266.namprd12.prod.outlook.com>
Hi Jose,
Thanks your quick reply.
On 2019/8/7 16:24, Jose Abreu wrote:
> From: Shaokun Zhang <zhangshaokun@hisilicon.com>
> Date: Aug/07/2019, 09:17:29 (UTC+00:00)
>
>> From: yuqi jin <jinyuqi@huawei.com>
>>
>> XGMAC_MTL_RXQ_DMA_MAP1 will be configured if the number of queues is
>> greater than 3, but local variable chan will shift left more than 32-bits.
>> Let's fix this issue.
>
> This was already fixed in -net. Please see [1]
>
> [1] https://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next.git/co
> mmit/drivers/net/ethernet/stmicro/stmmac?id=e8df7e8c233a18d2704e37ecff475
> 83b494789d3
>
> ---
> Thanks,
> Jose Miguel Abreu
>
>
^ permalink raw reply
* Re: [PATCH v3 10/41] media/ivtv: convert put_page() to put_user_page*()
From: Hans Verkuil @ 2019-08-07 8:51 UTC (permalink / raw)
To: john.hubbard, Andrew Morton
Cc: Christoph Hellwig, Dan Williams, Dave Chinner, Dave Hansen,
Ira Weiny, Jan Kara, Jason Gunthorpe, Jérôme Glisse,
LKML, amd-gfx, ceph-devel, devel, devel, dri-devel, intel-gfx,
kvm, linux-arm-kernel, linux-block, linux-crypto, linux-fbdev,
linux-fsdevel, linux-media, linux-mm, linux-nfs, linux-rdma,
linux-rpi-kernel, linux-xfs, netdev, rds-devel, sparclinux, x86,
xen-devel, John Hubbard, Andy Walls, Mauro Carvalho Chehab
In-Reply-To: <20190807013340.9706-11-jhubbard@nvidia.com>
On 8/7/19 3:33 AM, john.hubbard@gmail.com wrote:
> From: John Hubbard <jhubbard@nvidia.com>
>
> For pages that were retained via get_user_pages*(), release those pages
> via the new put_user_page*() routines, instead of via put_page() or
> release_pages().
>
> This is part a tree-wide conversion, as described in commit fc1d8e7cca2d
> ("mm: introduce put_user_page*(), placeholder versions").
>
> Cc: Andy Walls <awalls@md.metrocast.net>
> Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
> Cc: linux-media@vger.kernel.org
> Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Acked-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Regards,
Hans
> ---
> drivers/media/pci/ivtv/ivtv-udma.c | 14 ++++----------
> drivers/media/pci/ivtv/ivtv-yuv.c | 11 +++--------
> 2 files changed, 7 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/media/pci/ivtv/ivtv-udma.c b/drivers/media/pci/ivtv/ivtv-udma.c
> index 5f8883031c9c..7c7f33c2412b 100644
> --- a/drivers/media/pci/ivtv/ivtv-udma.c
> +++ b/drivers/media/pci/ivtv/ivtv-udma.c
> @@ -92,7 +92,7 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long ivtv_dest_addr,
> {
> struct ivtv_dma_page_info user_dma;
> struct ivtv_user_dma *dma = &itv->udma;
> - int i, err;
> + int err;
>
> IVTV_DEBUG_DMA("ivtv_udma_setup, dst: 0x%08x\n", (unsigned int)ivtv_dest_addr);
>
> @@ -119,8 +119,7 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long ivtv_dest_addr,
> IVTV_DEBUG_WARN("failed to map user pages, returned %d instead of %d\n",
> err, user_dma.page_count);
> if (err >= 0) {
> - for (i = 0; i < err; i++)
> - put_page(dma->map[i]);
> + put_user_pages(dma->map, err);
> return -EINVAL;
> }
> return err;
> @@ -130,9 +129,7 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long ivtv_dest_addr,
>
> /* Fill SG List with new values */
> if (ivtv_udma_fill_sg_list(dma, &user_dma, 0) < 0) {
> - for (i = 0; i < dma->page_count; i++) {
> - put_page(dma->map[i]);
> - }
> + put_user_pages(dma->map, dma->page_count);
> dma->page_count = 0;
> return -ENOMEM;
> }
> @@ -153,7 +150,6 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long ivtv_dest_addr,
> void ivtv_udma_unmap(struct ivtv *itv)
> {
> struct ivtv_user_dma *dma = &itv->udma;
> - int i;
>
> IVTV_DEBUG_INFO("ivtv_unmap_user_dma\n");
>
> @@ -170,9 +166,7 @@ void ivtv_udma_unmap(struct ivtv *itv)
> ivtv_udma_sync_for_cpu(itv);
>
> /* Release User Pages */
> - for (i = 0; i < dma->page_count; i++) {
> - put_page(dma->map[i]);
> - }
> + put_user_pages(dma->map, dma->page_count);
> dma->page_count = 0;
> }
>
> diff --git a/drivers/media/pci/ivtv/ivtv-yuv.c b/drivers/media/pci/ivtv/ivtv-yuv.c
> index cd2fe2d444c0..2c61a11d391d 100644
> --- a/drivers/media/pci/ivtv/ivtv-yuv.c
> +++ b/drivers/media/pci/ivtv/ivtv-yuv.c
> @@ -30,7 +30,6 @@ static int ivtv_yuv_prep_user_dma(struct ivtv *itv, struct ivtv_user_dma *dma,
> struct yuv_playback_info *yi = &itv->yuv_info;
> u8 frame = yi->draw_frame;
> struct yuv_frame_info *f = &yi->new_frame_info[frame];
> - int i;
> int y_pages, uv_pages;
> unsigned long y_buffer_offset, uv_buffer_offset;
> int y_decode_height, uv_decode_height, y_size;
> @@ -81,8 +80,7 @@ static int ivtv_yuv_prep_user_dma(struct ivtv *itv, struct ivtv_user_dma *dma,
> uv_pages, uv_dma.page_count);
>
> if (uv_pages >= 0) {
> - for (i = 0; i < uv_pages; i++)
> - put_page(dma->map[y_pages + i]);
> + put_user_pages(&dma->map[y_pages], uv_pages);
> rc = -EFAULT;
> } else {
> rc = uv_pages;
> @@ -93,8 +91,7 @@ static int ivtv_yuv_prep_user_dma(struct ivtv *itv, struct ivtv_user_dma *dma,
> y_pages, y_dma.page_count);
> }
> if (y_pages >= 0) {
> - for (i = 0; i < y_pages; i++)
> - put_page(dma->map[i]);
> + put_user_pages(dma->map, y_pages);
> /*
> * Inherit the -EFAULT from rc's
> * initialization, but allow it to be
> @@ -112,9 +109,7 @@ static int ivtv_yuv_prep_user_dma(struct ivtv *itv, struct ivtv_user_dma *dma,
> /* Fill & map SG List */
> if (ivtv_udma_fill_sg_list (dma, &uv_dma, ivtv_udma_fill_sg_list (dma, &y_dma, 0)) < 0) {
> IVTV_DEBUG_WARN("could not allocate bounce buffers for highmem userspace buffers\n");
> - for (i = 0; i < dma->page_count; i++) {
> - put_page(dma->map[i]);
> - }
> + put_user_pages(dma->map, dma->page_count);
> dma->page_count = 0;
> return -ENOMEM;
> }
>
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox