Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next v2 01/10] drop_monitor: Split tracing enable / disable to different functions
From: Ido Schimmel @ 2019-08-11  7:35 UTC (permalink / raw)
  To: netdev
  Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
	jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
	Ido Schimmel
In-Reply-To: <20190811073555.27068-1-idosch@idosch.org>

From: Ido Schimmel <idosch@mellanox.com>

Subsequent patches will need to enable / disable tracing based on the
configured alerting mode.

Reduce the nesting level and prepare for the introduction of this
functionality by splitting the tracing enable / disable operations into
two different functions.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
 net/core/drop_monitor.c | 79 ++++++++++++++++++++++++++---------------
 1 file changed, 51 insertions(+), 28 deletions(-)

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 4deb86f990f1..8b9b0b899ebc 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -241,11 +241,58 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
 	rcu_read_unlock();
 }
 
+static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
+{
+	int rc;
+
+	if (!try_module_get(THIS_MODULE)) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
+		return -ENODEV;
+	}
+
+	rc = register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+	if (rc) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to kfree_skb() tracepoint");
+		goto err_module_put;
+	}
+
+	rc = register_trace_napi_poll(trace_napi_poll_hit, NULL);
+	if (rc) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to napi_poll() tracepoint");
+		goto err_unregister_trace;
+	}
+
+	return 0;
+
+err_unregister_trace:
+	unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+err_module_put:
+	module_put(THIS_MODULE);
+	return rc;
+}
+
+static void net_dm_trace_off_set(void)
+{
+	struct dm_hw_stat_delta *new_stat, *temp;
+
+	unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
+	unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+
+	tracepoint_synchronize_unregister();
+
+	list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
+		if (new_stat->dev == NULL) {
+			list_del_rcu(&new_stat->list);
+			kfree_rcu(new_stat, rcu);
+		}
+	}
+
+	module_put(THIS_MODULE);
+}
+
 static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack)
 {
 	int rc = 0;
-	struct dm_hw_stat_delta *new_stat = NULL;
-	struct dm_hw_stat_delta *temp;
 
 	if (state == trace_state) {
 		NL_SET_ERR_MSG_MOD(extack, "Trace state already set to requested state");
@@ -254,34 +301,10 @@ static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack)
 
 	switch (state) {
 	case TRACE_ON:
-		if (!try_module_get(THIS_MODULE)) {
-			NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
-			rc = -ENODEV;
-			break;
-		}
-
-		rc |= register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
-		rc |= register_trace_napi_poll(trace_napi_poll_hit, NULL);
+		rc = net_dm_trace_on_set(extack);
 		break;
-
 	case TRACE_OFF:
-		rc |= unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
-		rc |= unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
-
-		tracepoint_synchronize_unregister();
-
-		/*
-		 * Clean the device list
-		 */
-		list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
-			if (new_stat->dev == NULL) {
-				list_del_rcu(&new_stat->list);
-				kfree_rcu(new_stat, rcu);
-			}
-		}
-
-		module_put(THIS_MODULE);
-
+		net_dm_trace_off_set();
 		break;
 	default:
 		rc = 1;
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next v2 02/10] drop_monitor: Initialize timer and work item upon tracing enable
From: Ido Schimmel @ 2019-08-11  7:35 UTC (permalink / raw)
  To: netdev
  Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
	jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
	Ido Schimmel
In-Reply-To: <20190811073555.27068-1-idosch@idosch.org>

From: Ido Schimmel <idosch@mellanox.com>

The timer and work item are currently initialized once during module
init, but subsequent patches will need to associate different functions
with the work item, based on the configured alert mode.

Allow subsequent patches to make that change by initializing and
de-initializing these objects during tracing enable and disable.

This also guarantees that once the request to disable tracing returns,
no more netlink notifications will be generated.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
 net/core/drop_monitor.c | 24 +++++++++++++++++++-----
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 8b9b0b899ebc..b266dc1660ed 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -243,13 +243,20 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
 
 static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
 {
-	int rc;
+	int cpu, rc;
 
 	if (!try_module_get(THIS_MODULE)) {
 		NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
 		return -ENODEV;
 	}
 
+	for_each_possible_cpu(cpu) {
+		struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
+
+		INIT_WORK(&data->dm_alert_work, send_dm_alert);
+		timer_setup(&data->send_timer, sched_send_work, 0);
+	}
+
 	rc = register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
 	if (rc) {
 		NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to kfree_skb() tracepoint");
@@ -274,12 +281,23 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
 static void net_dm_trace_off_set(void)
 {
 	struct dm_hw_stat_delta *new_stat, *temp;
+	int cpu;
 
 	unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
 	unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
 
 	tracepoint_synchronize_unregister();
 
+	/* Make sure we do not send notifications to user space after request
+	 * to stop tracing returns.
+	 */
+	for_each_possible_cpu(cpu) {
+		struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
+
+		del_timer_sync(&data->send_timer);
+		cancel_work_sync(&data->dm_alert_work);
+	}
+
 	list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
 		if (new_stat->dev == NULL) {
 			list_del_rcu(&new_stat->list);
@@ -481,14 +499,10 @@ static void exit_net_drop_monitor(void)
 	/*
 	 * Because of the module_get/put we do in the trace state change path
 	 * we are guarnateed not to have any current users when we get here
-	 * all we need to do is make sure that we don't have any running timers
-	 * or pending schedule calls
 	 */
 
 	for_each_possible_cpu(cpu) {
 		data = &per_cpu(dm_cpu_data, cpu);
-		del_timer_sync(&data->send_timer);
-		cancel_work_sync(&data->dm_alert_work);
 		/*
 		 * At this point, we should have exclusive access
 		 * to this struct and can free the skb inside it
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next v2 04/10] drop_monitor: Require CAP_NET_ADMIN for drop monitor configuration
From: Ido Schimmel @ 2019-08-11  7:35 UTC (permalink / raw)
  To: netdev
  Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
	jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
	Ido Schimmel
In-Reply-To: <20190811073555.27068-1-idosch@idosch.org>

From: Ido Schimmel <idosch@mellanox.com>

Currently, the configure command does not do anything but return an
error. Subsequent patches will enable the command to change various
configuration options such as alert mode and packet truncation.

Similar to other netlink-based configuration channels, make sure only
users with the CAP_NET_ADMIN capability set can execute this command.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
 net/core/drop_monitor.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 1cf4988de591..cd2f3069f34e 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -409,6 +409,7 @@ static const struct genl_ops dropmon_ops[] = {
 		.cmd = NET_DM_CMD_CONFIG,
 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = net_dm_cmd_config,
+		.flags = GENL_ADMIN_PERM,
 	},
 	{
 		.cmd = NET_DM_CMD_START,
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next v2 03/10] drop_monitor: Reset per-CPU data before starting to trace
From: Ido Schimmel @ 2019-08-11  7:35 UTC (permalink / raw)
  To: netdev
  Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
	jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
	Ido Schimmel
In-Reply-To: <20190811073555.27068-1-idosch@idosch.org>

From: Ido Schimmel <idosch@mellanox.com>

The function reset_per_cpu_data() allocates and prepares a new skb for
the summary netlink alert message ('NET_DM_CMD_ALERT'). The new skb is
stored in the per-CPU 'data' variable and the old is returned.

The function is invoked during module initialization and from the
workqueue, before an alert is sent. This means that it is possible to
receive an alert with stale data, if we stopped tracing when the
hysteresis timer ('data->send_timer') was pending.

Instead of invoking the function during module initialization, invoke it
just before we start tracing and ensure we get a fresh skb.

This also allows us to remove the calls to initialize the timer and the
work item from the module initialization path, since both could have
been triggered by the error paths of reset_per_cpu_data().

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
 net/core/drop_monitor.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index b266dc1660ed..1cf4988de591 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -252,9 +252,16 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
 
 	for_each_possible_cpu(cpu) {
 		struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
+		struct sk_buff *skb;
 
 		INIT_WORK(&data->dm_alert_work, send_dm_alert);
 		timer_setup(&data->send_timer, sched_send_work, 0);
+		/* Allocate a new per-CPU skb for the summary alert message and
+		 * free the old one which might contain stale data from
+		 * previous tracing.
+		 */
+		skb = reset_per_cpu_data(data);
+		consume_skb(skb);
 	}
 
 	rc = register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
@@ -475,10 +482,7 @@ static int __init init_net_drop_monitor(void)
 
 	for_each_possible_cpu(cpu) {
 		data = &per_cpu(dm_cpu_data, cpu);
-		INIT_WORK(&data->dm_alert_work, send_dm_alert);
-		timer_setup(&data->send_timer, sched_send_work, 0);
 		spin_lock_init(&data->lock);
-		reset_per_cpu_data(data);
 	}
 
 	goto out;
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next v2 06/10] drop_monitor: Add packet alert mode
From: Ido Schimmel @ 2019-08-11  7:35 UTC (permalink / raw)
  To: netdev
  Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
	jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
	Ido Schimmel
In-Reply-To: <20190811073555.27068-1-idosch@idosch.org>

From: Ido Schimmel <idosch@mellanox.com>

So far drop monitor supported only one alert mode in which a summary of
locations in which packets were recently dropped was sent to user space.

This alert mode is sufficient in order to understand that packets were
dropped, but lacks information to perform a more detailed analysis.

Add a new alert mode in which the dropped packet itself is passed to
user space along with metadata: The drop location (as program counter
and resolved symbol), ingress netdevice and drop timestamp. More
metadata can be added in the future.

To avoid performing expensive operations in the context in which
kfree_skb() is invoked (can be hard IRQ), the dropped skb is cloned and
queued on per-CPU skb drop list. Then, in process context the netlink
message is allocated, prepared and finally sent to user space.

The per-CPU skb drop list is limited to 1000 skbs to prevent exhausting
the system's memory. Subsequent patches will make this limit
configurable and also add a counter that indicates how many skbs were
tail dropped.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
 include/uapi/linux/net_dropmon.h |  27 +++
 net/core/drop_monitor.c          | 280 ++++++++++++++++++++++++++++++-
 2 files changed, 305 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 0fecdedeb6ca..cfaaf75371b8 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -53,6 +53,7 @@ enum {
 	NET_DM_CMD_CONFIG,
 	NET_DM_CMD_START,
 	NET_DM_CMD_STOP,
+	NET_DM_CMD_PACKET_ALERT,
 	_NET_DM_CMD_MAX,
 };
 
@@ -63,12 +64,38 @@ enum {
  */
 #define NET_DM_GRP_ALERT 1
 
+enum net_dm_attr {
+	NET_DM_ATTR_UNSPEC,
+
+	NET_DM_ATTR_ALERT_MODE,			/* u8 */
+	NET_DM_ATTR_PC,				/* u64 */
+	NET_DM_ATTR_SYMBOL,			/* string */
+	NET_DM_ATTR_IN_PORT,			/* nested */
+	NET_DM_ATTR_TIMESTAMP,			/* struct timespec */
+	NET_DM_ATTR_PROTO,			/* u16 */
+	NET_DM_ATTR_PAYLOAD,			/* binary */
+	NET_DM_ATTR_PAD,
+
+	__NET_DM_ATTR_MAX,
+	NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1
+};
+
 /**
  * enum net_dm_alert_mode - Alert mode.
  * @NET_DM_ALERT_MODE_SUMMARY: A summary of recent drops is sent to user space.
+ * @NET_DM_ALERT_MODE_PACKET: Each dropped packet is sent to user space along
+ *                            with metadata.
  */
 enum net_dm_alert_mode {
 	NET_DM_ALERT_MODE_SUMMARY,
+	NET_DM_ALERT_MODE_PACKET,
+};
+
+enum {
+	NET_DM_ATTR_PORT_NETDEV_IFINDEX,	/* u32 */
+
+	__NET_DM_ATTR_PORT_MAX,
+	NET_DM_ATTR_PORT_MAX = __NET_DM_ATTR_PORT_MAX - 1
 };
 
 #endif
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 9cd2f662cb9e..ba765832413b 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -54,6 +54,7 @@ static DEFINE_MUTEX(net_dm_mutex);
 struct per_cpu_dm_data {
 	spinlock_t		lock;	/* Protects 'skb' and 'send_timer' */
 	struct sk_buff		*skb;
+	struct sk_buff_head	drop_queue;
 	struct work_struct	dm_alert_work;
 	struct timer_list	send_timer;
 };
@@ -85,6 +86,14 @@ struct net_dm_alert_ops {
 	void (*work_item_func)(struct work_struct *work);
 };
 
+struct net_dm_skb_cb {
+	void *pc;
+};
+
+#define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0]))
+
+#define NET_DM_QUEUE_LEN 1000
+
 static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
 {
 	size_t al;
@@ -257,8 +266,214 @@ static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
 	.work_item_func		= send_dm_alert,
 };
 
+static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
+					      struct sk_buff *skb,
+					      void *location)
+{
+	ktime_t tstamp = ktime_get_real();
+	struct per_cpu_dm_data *data;
+	struct sk_buff *nskb;
+	unsigned long flags;
+
+	nskb = skb_clone(skb, GFP_ATOMIC);
+	if (!nskb)
+		return;
+
+	NET_DM_SKB_CB(nskb)->pc = location;
+	/* Override the timestamp because we care about the time when the
+	 * packet was dropped.
+	 */
+	nskb->tstamp = tstamp;
+
+	data = this_cpu_ptr(&dm_cpu_data);
+
+	spin_lock_irqsave(&data->drop_queue.lock, flags);
+	if (skb_queue_len(&data->drop_queue) < NET_DM_QUEUE_LEN)
+		__skb_queue_tail(&data->drop_queue, nskb);
+	else
+		goto unlock_free;
+	spin_unlock_irqrestore(&data->drop_queue.lock, flags);
+
+	schedule_work(&data->dm_alert_work);
+
+	return;
+
+unlock_free:
+	spin_unlock_irqrestore(&data->drop_queue.lock, flags);
+	consume_skb(nskb);
+}
+
+static void net_dm_packet_trace_napi_poll_hit(void *ignore,
+					      struct napi_struct *napi,
+					      int work, int budget)
+{
+}
+
+static size_t net_dm_in_port_size(void)
+{
+	       /* NET_DM_ATTR_IN_PORT nest */
+	return nla_total_size(0) +
+	       /* NET_DM_ATTR_PORT_NETDEV_IFINDEX */
+	       nla_total_size(sizeof(u32));
+}
+
+#define NET_DM_MAX_SYMBOL_LEN 40
+
+static size_t net_dm_packet_report_size(size_t payload_len)
+{
+	size_t size;
+
+	size = nlmsg_msg_size(GENL_HDRLEN + net_drop_monitor_family.hdrsize);
+
+	return NLMSG_ALIGN(size) +
+	       /* NET_DM_ATTR_PC */
+	       nla_total_size(sizeof(u64)) +
+	       /* NET_DM_ATTR_SYMBOL */
+	       nla_total_size(NET_DM_MAX_SYMBOL_LEN + 1) +
+	       /* NET_DM_ATTR_IN_PORT */
+	       net_dm_in_port_size() +
+	       /* NET_DM_ATTR_TIMESTAMP */
+	       nla_total_size(sizeof(struct timespec)) +
+	       /* NET_DM_ATTR_PROTO */
+	       nla_total_size(sizeof(u16)) +
+	       /* NET_DM_ATTR_PAYLOAD */
+	       nla_total_size(payload_len);
+}
+
+static int net_dm_packet_report_in_port_put(struct sk_buff *msg, int ifindex)
+{
+	struct nlattr *attr;
+
+	attr = nla_nest_start(msg, NET_DM_ATTR_IN_PORT);
+	if (!attr)
+		return -EMSGSIZE;
+
+	if (ifindex &&
+	    nla_put_u32(msg, NET_DM_ATTR_PORT_NETDEV_IFINDEX, ifindex))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, attr);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, attr);
+	return -EMSGSIZE;
+}
+
+static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
+				     size_t payload_len)
+{
+	u64 pc = (u64)(uintptr_t) NET_DM_SKB_CB(skb)->pc;
+	char buf[NET_DM_MAX_SYMBOL_LEN];
+	struct nlattr *attr;
+	struct timespec ts;
+	void *hdr;
+	int rc;
+
+	hdr = genlmsg_put(msg, 0, 0, &net_drop_monitor_family, 0,
+			  NET_DM_CMD_PACKET_ALERT);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (nla_put_u64_64bit(msg, NET_DM_ATTR_PC, pc, NET_DM_ATTR_PAD))
+		goto nla_put_failure;
+
+	snprintf(buf, sizeof(buf), "%pS", NET_DM_SKB_CB(skb)->pc);
+	if (nla_put_string(msg, NET_DM_ATTR_SYMBOL, buf))
+		goto nla_put_failure;
+
+	rc = net_dm_packet_report_in_port_put(msg, skb->skb_iif);
+	if (rc)
+		goto nla_put_failure;
+
+	if (ktime_to_timespec_cond(skb->tstamp, &ts) &&
+	    nla_put(msg, NET_DM_ATTR_TIMESTAMP, sizeof(ts), &ts))
+		goto nla_put_failure;
+
+	if (!payload_len)
+		goto out;
+
+	if (nla_put_u16(msg, NET_DM_ATTR_PROTO, be16_to_cpu(skb->protocol)))
+		goto nla_put_failure;
+
+	attr = skb_put(msg, nla_total_size(payload_len));
+	attr->nla_type = NET_DM_ATTR_PAYLOAD;
+	attr->nla_len = nla_attr_size(payload_len);
+	if (skb_copy_bits(skb, 0, nla_data(attr), payload_len))
+		goto nla_put_failure;
+
+out:
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+#define NET_DM_MAX_PACKET_SIZE (0xffff - NLA_HDRLEN - NLA_ALIGNTO)
+
+static void net_dm_packet_report(struct sk_buff *skb)
+{
+	struct sk_buff *msg;
+	size_t payload_len;
+	int rc;
+
+	/* Make sure we start copying the packet from the MAC header */
+	if (skb->data > skb_mac_header(skb))
+		skb_push(skb, skb->data - skb_mac_header(skb));
+	else
+		skb_pull(skb, skb_mac_header(skb) - skb->data);
+
+	/* Ensure packet fits inside a single netlink attribute */
+	payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE);
+
+	msg = nlmsg_new(net_dm_packet_report_size(payload_len), GFP_KERNEL);
+	if (!msg)
+		goto out;
+
+	rc = net_dm_packet_report_fill(msg, skb, payload_len);
+	if (rc) {
+		nlmsg_free(msg);
+		goto out;
+	}
+
+	genlmsg_multicast(&net_drop_monitor_family, msg, 0, 0, GFP_KERNEL);
+
+out:
+	consume_skb(skb);
+}
+
+static void net_dm_packet_work(struct work_struct *work)
+{
+	struct per_cpu_dm_data *data;
+	struct sk_buff_head list;
+	struct sk_buff *skb;
+	unsigned long flags;
+
+	data = container_of(work, struct per_cpu_dm_data, dm_alert_work);
+
+	__skb_queue_head_init(&list);
+
+	spin_lock_irqsave(&data->drop_queue.lock, flags);
+	skb_queue_splice_tail_init(&data->drop_queue, &list);
+	spin_unlock_irqrestore(&data->drop_queue.lock, flags);
+
+	while ((skb = __skb_dequeue(&list)))
+		net_dm_packet_report(skb);
+}
+
+static const struct net_dm_alert_ops net_dm_alert_packet_ops = {
+	.kfree_skb_probe	= net_dm_packet_trace_kfree_skb_hit,
+	.napi_poll_probe	= net_dm_packet_trace_napi_poll_hit,
+	.work_item_func		= net_dm_packet_work,
+};
+
 static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
 	[NET_DM_ALERT_MODE_SUMMARY]	= &net_dm_alert_summary_ops,
+	[NET_DM_ALERT_MODE_PACKET]	= &net_dm_alert_packet_ops,
 };
 
 static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
@@ -326,9 +541,12 @@ static void net_dm_trace_off_set(void)
 	 */
 	for_each_possible_cpu(cpu) {
 		struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
+		struct sk_buff *skb;
 
 		del_timer_sync(&data->send_timer);
 		cancel_work_sync(&data->dm_alert_work);
+		while ((skb = __skb_dequeue(&data->drop_queue)))
+			consume_skb(skb);
 	}
 
 	list_for_each_entry_safe(new_stat, temp, &hw_stats_list, list) {
@@ -370,12 +588,61 @@ static int set_all_monitor_traces(int state, struct netlink_ext_ack *extack)
 	return rc;
 }
 
+static int net_dm_alert_mode_get_from_info(struct genl_info *info,
+					   enum net_dm_alert_mode *p_alert_mode)
+{
+	u8 val;
+
+	val = nla_get_u8(info->attrs[NET_DM_ATTR_ALERT_MODE]);
+
+	switch (val) {
+	case NET_DM_ALERT_MODE_SUMMARY: /* fall-through */
+	case NET_DM_ALERT_MODE_PACKET:
+		*p_alert_mode = val;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int net_dm_alert_mode_set(struct genl_info *info)
+{
+	struct netlink_ext_ack *extack = info->extack;
+	enum net_dm_alert_mode alert_mode;
+	int rc;
+
+	if (!info->attrs[NET_DM_ATTR_ALERT_MODE])
+		return 0;
+
+	rc = net_dm_alert_mode_get_from_info(info, &alert_mode);
+	if (rc) {
+		NL_SET_ERR_MSG_MOD(extack, "Invalid alert mode");
+		return -EINVAL;
+	}
+
+	net_dm_alert_mode = alert_mode;
+
+	return 0;
+}
+
 static int net_dm_cmd_config(struct sk_buff *skb,
 			struct genl_info *info)
 {
-	NL_SET_ERR_MSG_MOD(info->extack, "Command not supported");
+	struct netlink_ext_ack *extack = info->extack;
+	int rc;
 
-	return -EOPNOTSUPP;
+	if (trace_state == TRACE_ON) {
+		NL_SET_ERR_MSG_MOD(extack, "Cannot configure drop monitor while tracing is on");
+		return -EBUSY;
+	}
+
+	rc = net_dm_alert_mode_set(info);
+	if (rc)
+		return rc;
+
+	return 0;
 }
 
 static int net_dm_cmd_trace(struct sk_buff *skb,
@@ -430,6 +697,11 @@ static int dropmon_net_event(struct notifier_block *ev_block,
 	return NOTIFY_DONE;
 }
 
+static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = {
+	[NET_DM_ATTR_UNSPEC] = { .strict_start_type = NET_DM_ATTR_UNSPEC + 1 },
+	[NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 },
+};
+
 static const struct genl_ops dropmon_ops[] = {
 	{
 		.cmd = NET_DM_CMD_CONFIG,
@@ -467,6 +739,8 @@ static struct genl_family net_drop_monitor_family __ro_after_init = {
 	.hdrsize        = 0,
 	.name           = "NET_DM",
 	.version        = 2,
+	.maxattr	= NET_DM_ATTR_MAX,
+	.policy		= net_dm_nl_policy,
 	.pre_doit	= net_dm_nl_pre_doit,
 	.post_doit	= net_dm_nl_post_doit,
 	.module		= THIS_MODULE,
@@ -510,6 +784,7 @@ static int __init init_net_drop_monitor(void)
 	for_each_possible_cpu(cpu) {
 		data = &per_cpu(dm_cpu_data, cpu);
 		spin_lock_init(&data->lock);
+		skb_queue_head_init(&data->drop_queue);
 	}
 
 	goto out;
@@ -539,6 +814,7 @@ static void exit_net_drop_monitor(void)
 		 * to this struct and can free the skb inside it
 		 */
 		kfree_skb(data->skb);
+		WARN_ON(!skb_queue_empty(&data->drop_queue));
 	}
 
 	BUG_ON(genl_unregister_family(&net_drop_monitor_family));
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next v2 05/10] drop_monitor: Add alert mode operations
From: Ido Schimmel @ 2019-08-11  7:35 UTC (permalink / raw)
  To: netdev
  Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
	jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
	Ido Schimmel
In-Reply-To: <20190811073555.27068-1-idosch@idosch.org>

From: Ido Schimmel <idosch@mellanox.com>

The next patch is going to add another alert mode in which the dropped
packet is notified to user space, instead of only a summary of recent
drops.

Abstract the differences between the modes by adding alert mode
operations. The operations are selected based on the currently
configured mode and associated with the probes and the work item just
before tracing starts.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
 include/uapi/linux/net_dropmon.h |  9 ++++++++
 net/core/drop_monitor.c          | 38 +++++++++++++++++++++++++++-----
 2 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 5edbd0a675fd..0fecdedeb6ca 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -62,4 +62,13 @@ enum {
  * Our group identifiers
  */
 #define NET_DM_GRP_ALERT 1
+
+/**
+ * enum net_dm_alert_mode - Alert mode.
+ * @NET_DM_ALERT_MODE_SUMMARY: A summary of recent drops is sent to user space.
+ */
+enum net_dm_alert_mode {
+	NET_DM_ALERT_MODE_SUMMARY,
+};
+
 #endif
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index cd2f3069f34e..9cd2f662cb9e 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -75,6 +75,16 @@ static int dm_delay = 1;
 static unsigned long dm_hw_check_delta = 2*HZ;
 static LIST_HEAD(hw_stats_list);
 
+static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
+
+struct net_dm_alert_ops {
+	void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb,
+				void *location);
+	void (*napi_poll_probe)(void *ignore, struct napi_struct *napi,
+				int work, int budget);
+	void (*work_item_func)(struct work_struct *work);
+};
+
 static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
 {
 	size_t al;
@@ -241,10 +251,23 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
 	rcu_read_unlock();
 }
 
+static const struct net_dm_alert_ops net_dm_alert_summary_ops = {
+	.kfree_skb_probe	= trace_kfree_skb_hit,
+	.napi_poll_probe	= trace_napi_poll_hit,
+	.work_item_func		= send_dm_alert,
+};
+
+static const struct net_dm_alert_ops *net_dm_alert_ops_arr[] = {
+	[NET_DM_ALERT_MODE_SUMMARY]	= &net_dm_alert_summary_ops,
+};
+
 static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
 {
+	const struct net_dm_alert_ops *ops;
 	int cpu, rc;
 
+	ops = net_dm_alert_ops_arr[net_dm_alert_mode];
+
 	if (!try_module_get(THIS_MODULE)) {
 		NL_SET_ERR_MSG_MOD(extack, "Failed to take reference on module");
 		return -ENODEV;
@@ -254,7 +277,7 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
 		struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
 		struct sk_buff *skb;
 
-		INIT_WORK(&data->dm_alert_work, send_dm_alert);
+		INIT_WORK(&data->dm_alert_work, ops->work_item_func);
 		timer_setup(&data->send_timer, sched_send_work, 0);
 		/* Allocate a new per-CPU skb for the summary alert message and
 		 * free the old one which might contain stale data from
@@ -264,13 +287,13 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
 		consume_skb(skb);
 	}
 
-	rc = register_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+	rc = register_trace_kfree_skb(ops->kfree_skb_probe, NULL);
 	if (rc) {
 		NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to kfree_skb() tracepoint");
 		goto err_module_put;
 	}
 
-	rc = register_trace_napi_poll(trace_napi_poll_hit, NULL);
+	rc = register_trace_napi_poll(ops->napi_poll_probe, NULL);
 	if (rc) {
 		NL_SET_ERR_MSG_MOD(extack, "Failed to connect probe to napi_poll() tracepoint");
 		goto err_unregister_trace;
@@ -279,7 +302,7 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
 	return 0;
 
 err_unregister_trace:
-	unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+	unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL);
 err_module_put:
 	module_put(THIS_MODULE);
 	return rc;
@@ -288,10 +311,13 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
 static void net_dm_trace_off_set(void)
 {
 	struct dm_hw_stat_delta *new_stat, *temp;
+	const struct net_dm_alert_ops *ops;
 	int cpu;
 
-	unregister_trace_napi_poll(trace_napi_poll_hit, NULL);
-	unregister_trace_kfree_skb(trace_kfree_skb_hit, NULL);
+	ops = net_dm_alert_ops_arr[net_dm_alert_mode];
+
+	unregister_trace_napi_poll(ops->napi_poll_probe, NULL);
+	unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL);
 
 	tracepoint_synchronize_unregister();
 
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next v2 07/10] drop_monitor: Allow truncation of dropped packets
From: Ido Schimmel @ 2019-08-11  7:35 UTC (permalink / raw)
  To: netdev
  Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
	jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
	Ido Schimmel
In-Reply-To: <20190811073555.27068-1-idosch@idosch.org>

From: Ido Schimmel <idosch@mellanox.com>

When sending dropped packets to user space it is not always necessary to
copy the entire packet as usually only the headers are of interest.

Allow user to specify the truncation length and add the original length
of the packet as additional metadata to the netlink message.

By default no truncation is performed.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
 include/uapi/linux/net_dropmon.h |  2 ++
 net/core/drop_monitor.c          | 19 +++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index cfaaf75371b8..5cd7eb1f66ba 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -75,6 +75,8 @@ enum net_dm_attr {
 	NET_DM_ATTR_PROTO,			/* u16 */
 	NET_DM_ATTR_PAYLOAD,			/* binary */
 	NET_DM_ATTR_PAD,
+	NET_DM_ATTR_TRUNC_LEN,			/* u32 */
+	NET_DM_ATTR_ORIG_LEN,			/* u32 */
 
 	__NET_DM_ATTR_MAX,
 	NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index ba765832413b..9f884adaa85f 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -77,6 +77,7 @@ static unsigned long dm_hw_check_delta = 2*HZ;
 static LIST_HEAD(hw_stats_list);
 
 static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
+static u32 net_dm_trunc_len;
 
 struct net_dm_alert_ops {
 	void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb,
@@ -334,6 +335,8 @@ static size_t net_dm_packet_report_size(size_t payload_len)
 	       net_dm_in_port_size() +
 	       /* NET_DM_ATTR_TIMESTAMP */
 	       nla_total_size(sizeof(struct timespec)) +
+	       /* NET_DM_ATTR_ORIG_LEN */
+	       nla_total_size(sizeof(u32)) +
 	       /* NET_DM_ATTR_PROTO */
 	       nla_total_size(sizeof(u16)) +
 	       /* NET_DM_ATTR_PAYLOAD */
@@ -391,6 +394,9 @@ static int net_dm_packet_report_fill(struct sk_buff *msg, struct sk_buff *skb,
 	    nla_put(msg, NET_DM_ATTR_TIMESTAMP, sizeof(ts), &ts))
 		goto nla_put_failure;
 
+	if (nla_put_u32(msg, NET_DM_ATTR_ORIG_LEN, skb->len))
+		goto nla_put_failure;
+
 	if (!payload_len)
 		goto out;
 
@@ -429,6 +435,8 @@ static void net_dm_packet_report(struct sk_buff *skb)
 
 	/* Ensure packet fits inside a single netlink attribute */
 	payload_len = min_t(size_t, skb->len, NET_DM_MAX_PACKET_SIZE);
+	if (net_dm_trunc_len)
+		payload_len = min_t(size_t, net_dm_trunc_len, payload_len);
 
 	msg = nlmsg_new(net_dm_packet_report_size(payload_len), GFP_KERNEL);
 	if (!msg)
@@ -627,6 +635,14 @@ static int net_dm_alert_mode_set(struct genl_info *info)
 	return 0;
 }
 
+static void net_dm_trunc_len_set(struct genl_info *info)
+{
+	if (!info->attrs[NET_DM_ATTR_TRUNC_LEN])
+		return;
+
+	net_dm_trunc_len = nla_get_u32(info->attrs[NET_DM_ATTR_TRUNC_LEN]);
+}
+
 static int net_dm_cmd_config(struct sk_buff *skb,
 			struct genl_info *info)
 {
@@ -642,6 +658,8 @@ static int net_dm_cmd_config(struct sk_buff *skb,
 	if (rc)
 		return rc;
 
+	net_dm_trunc_len_set(info);
+
 	return 0;
 }
 
@@ -700,6 +718,7 @@ static int dropmon_net_event(struct notifier_block *ev_block,
 static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = {
 	[NET_DM_ATTR_UNSPEC] = { .strict_start_type = NET_DM_ATTR_UNSPEC + 1 },
 	[NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 },
+	[NET_DM_ATTR_TRUNC_LEN] = { .type = NLA_U32 },
 };
 
 static const struct genl_ops dropmon_ops[] = {
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next v2 08/10] drop_monitor: Add a command to query current configuration
From: Ido Schimmel @ 2019-08-11  7:35 UTC (permalink / raw)
  To: netdev
  Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
	jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
	Ido Schimmel
In-Reply-To: <20190811073555.27068-1-idosch@idosch.org>

From: Ido Schimmel <idosch@mellanox.com>

Users should be able to query the current configuration of drop monitor
before they start using it. Add a command to query the existing
configuration which currently consists of alert mode and packet
truncation length.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
 include/uapi/linux/net_dropmon.h |  2 ++
 net/core/drop_monitor.c          | 48 ++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+)

diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 5cd7eb1f66ba..3b765a8428b5 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -54,6 +54,8 @@ enum {
 	NET_DM_CMD_START,
 	NET_DM_CMD_STOP,
 	NET_DM_CMD_PACKET_ALERT,
+	NET_DM_CMD_CONFIG_GET,
+	NET_DM_CMD_CONFIG_NEW,
 	_NET_DM_CMD_MAX,
 };
 
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 9f884adaa85f..135638474ab8 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -676,6 +676,50 @@ static int net_dm_cmd_trace(struct sk_buff *skb,
 	return -EOPNOTSUPP;
 }
 
+static int net_dm_config_fill(struct sk_buff *msg, struct genl_info *info)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+			  &net_drop_monitor_family, 0, NET_DM_CMD_CONFIG_NEW);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	if (nla_put_u8(msg, NET_DM_ATTR_ALERT_MODE, net_dm_alert_mode))
+		goto nla_put_failure;
+
+	if (nla_put_u32(msg, NET_DM_ATTR_TRUNC_LEN, net_dm_trunc_len))
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int net_dm_cmd_config_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	int rc;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	rc = net_dm_config_fill(msg, info);
+	if (rc)
+		goto free_msg;
+
+	return genlmsg_reply(msg, info);
+
+free_msg:
+	nlmsg_free(msg);
+	return rc;
+}
+
 static int dropmon_net_event(struct notifier_block *ev_block,
 			     unsigned long event, void *ptr)
 {
@@ -738,6 +782,10 @@ static const struct genl_ops dropmon_ops[] = {
 		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
 		.doit = net_dm_cmd_trace,
 	},
+	{
+		.cmd = NET_DM_CMD_CONFIG_GET,
+		.doit = net_dm_cmd_config_get,
+	},
 };
 
 static int net_dm_nl_pre_doit(const struct genl_ops *ops,
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next v2 09/10] drop_monitor: Make drop queue length configurable
From: Ido Schimmel @ 2019-08-11  7:35 UTC (permalink / raw)
  To: netdev
  Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
	jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
	Ido Schimmel
In-Reply-To: <20190811073555.27068-1-idosch@idosch.org>

From: Ido Schimmel <idosch@mellanox.com>

In packet alert mode, each CPU holds a list of dropped skbs that need to
be processed in process context and sent to user space. To avoid
exhausting the system's memory the maximum length of this queue is
currently set to 1000.

Allow users to tune the length of this queue according to their needs.
The configured length is reported to user space when drop monitor
configuration is queried.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
 include/uapi/linux/net_dropmon.h |  1 +
 net/core/drop_monitor.c          | 19 ++++++++++++++++---
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 3b765a8428b5..1d0bdb1ba954 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -79,6 +79,7 @@ enum net_dm_attr {
 	NET_DM_ATTR_PAD,
 	NET_DM_ATTR_TRUNC_LEN,			/* u32 */
 	NET_DM_ATTR_ORIG_LEN,			/* u32 */
+	NET_DM_ATTR_QUEUE_LEN,			/* u32 */
 
 	__NET_DM_ATTR_MAX,
 	NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 135638474ab8..eb3c34d69ea9 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -78,6 +78,7 @@ static LIST_HEAD(hw_stats_list);
 
 static enum net_dm_alert_mode net_dm_alert_mode = NET_DM_ALERT_MODE_SUMMARY;
 static u32 net_dm_trunc_len;
+static u32 net_dm_queue_len = 1000;
 
 struct net_dm_alert_ops {
 	void (*kfree_skb_probe)(void *ignore, struct sk_buff *skb,
@@ -93,8 +94,6 @@ struct net_dm_skb_cb {
 
 #define NET_DM_SKB_CB(__skb) ((struct net_dm_skb_cb *)&((__skb)->cb[0]))
 
-#define NET_DM_QUEUE_LEN 1000
-
 static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data)
 {
 	size_t al;
@@ -289,7 +288,7 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
 	data = this_cpu_ptr(&dm_cpu_data);
 
 	spin_lock_irqsave(&data->drop_queue.lock, flags);
-	if (skb_queue_len(&data->drop_queue) < NET_DM_QUEUE_LEN)
+	if (skb_queue_len(&data->drop_queue) < net_dm_queue_len)
 		__skb_queue_tail(&data->drop_queue, nskb);
 	else
 		goto unlock_free;
@@ -643,6 +642,14 @@ static void net_dm_trunc_len_set(struct genl_info *info)
 	net_dm_trunc_len = nla_get_u32(info->attrs[NET_DM_ATTR_TRUNC_LEN]);
 }
 
+static void net_dm_queue_len_set(struct genl_info *info)
+{
+	if (!info->attrs[NET_DM_ATTR_QUEUE_LEN])
+		return;
+
+	net_dm_queue_len = nla_get_u32(info->attrs[NET_DM_ATTR_QUEUE_LEN]);
+}
+
 static int net_dm_cmd_config(struct sk_buff *skb,
 			struct genl_info *info)
 {
@@ -660,6 +667,8 @@ static int net_dm_cmd_config(struct sk_buff *skb,
 
 	net_dm_trunc_len_set(info);
 
+	net_dm_queue_len_set(info);
+
 	return 0;
 }
 
@@ -691,6 +700,9 @@ static int net_dm_config_fill(struct sk_buff *msg, struct genl_info *info)
 	if (nla_put_u32(msg, NET_DM_ATTR_TRUNC_LEN, net_dm_trunc_len))
 		goto nla_put_failure;
 
+	if (nla_put_u32(msg, NET_DM_ATTR_QUEUE_LEN, net_dm_queue_len))
+		goto nla_put_failure;
+
 	genlmsg_end(msg, hdr);
 
 	return 0;
@@ -763,6 +775,7 @@ static const struct nla_policy net_dm_nl_policy[NET_DM_ATTR_MAX + 1] = {
 	[NET_DM_ATTR_UNSPEC] = { .strict_start_type = NET_DM_ATTR_UNSPEC + 1 },
 	[NET_DM_ATTR_ALERT_MODE] = { .type = NLA_U8 },
 	[NET_DM_ATTR_TRUNC_LEN] = { .type = NLA_U32 },
+	[NET_DM_ATTR_QUEUE_LEN] = { .type = NLA_U32 },
 };
 
 static const struct genl_ops dropmon_ops[] = {
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next v2 10/10] drop_monitor: Expose tail drop counter
From: Ido Schimmel @ 2019-08-11  7:35 UTC (permalink / raw)
  To: netdev
  Cc: davem, nhorman, jiri, toke, dsahern, roopa, nikolay,
	jakub.kicinski, andy, f.fainelli, andrew, vivien.didelot, mlxsw,
	Ido Schimmel
In-Reply-To: <20190811073555.27068-1-idosch@idosch.org>

From: Ido Schimmel <idosch@mellanox.com>

Previous patch made the length of the per-CPU skb drop list
configurable. Expose a counter that shows how many packets could not be
enqueued to this list.

This allows users determine the desired queue length.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
 include/uapi/linux/net_dropmon.h |  10 +++
 net/core/drop_monitor.c          | 101 +++++++++++++++++++++++++++++++
 2 files changed, 111 insertions(+)

diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h
index 1d0bdb1ba954..405b31cbf723 100644
--- a/include/uapi/linux/net_dropmon.h
+++ b/include/uapi/linux/net_dropmon.h
@@ -56,6 +56,8 @@ enum {
 	NET_DM_CMD_PACKET_ALERT,
 	NET_DM_CMD_CONFIG_GET,
 	NET_DM_CMD_CONFIG_NEW,
+	NET_DM_CMD_STATS_GET,
+	NET_DM_CMD_STATS_NEW,
 	_NET_DM_CMD_MAX,
 };
 
@@ -80,6 +82,7 @@ enum net_dm_attr {
 	NET_DM_ATTR_TRUNC_LEN,			/* u32 */
 	NET_DM_ATTR_ORIG_LEN,			/* u32 */
 	NET_DM_ATTR_QUEUE_LEN,			/* u32 */
+	NET_DM_ATTR_STATS,			/* nested */
 
 	__NET_DM_ATTR_MAX,
 	NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1
@@ -103,4 +106,11 @@ enum {
 	NET_DM_ATTR_PORT_MAX = __NET_DM_ATTR_PORT_MAX - 1
 };
 
+enum {
+	NET_DM_ATTR_STATS_DROPPED,		/* u64 */
+
+	__NET_DM_ATTR_STATS_MAX,
+	NET_DM_ATTR_STATS_MAX = __NET_DM_ATTR_STATS_MAX - 1
+};
+
 #endif
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index eb3c34d69ea9..39e094907391 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -51,12 +51,18 @@ static int trace_state = TRACE_OFF;
  */
 static DEFINE_MUTEX(net_dm_mutex);
 
+struct net_dm_stats {
+	u64 dropped;
+	struct u64_stats_sync syncp;
+};
+
 struct per_cpu_dm_data {
 	spinlock_t		lock;	/* Protects 'skb' and 'send_timer' */
 	struct sk_buff		*skb;
 	struct sk_buff_head	drop_queue;
 	struct work_struct	dm_alert_work;
 	struct timer_list	send_timer;
+	struct net_dm_stats	stats;
 };
 
 struct dm_hw_stat_delta {
@@ -300,6 +306,9 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
 
 unlock_free:
 	spin_unlock_irqrestore(&data->drop_queue.lock, flags);
+	u64_stats_update_begin(&data->stats.syncp);
+	data->stats.dropped++;
+	u64_stats_update_end(&data->stats.syncp);
 	consume_skb(nskb);
 }
 
@@ -732,6 +741,93 @@ static int net_dm_cmd_config_get(struct sk_buff *skb, struct genl_info *info)
 	return rc;
 }
 
+static void net_dm_stats_read(struct net_dm_stats *stats)
+{
+	int cpu;
+
+	memset(stats, 0, sizeof(*stats));
+	for_each_possible_cpu(cpu) {
+		struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
+		struct net_dm_stats *cpu_stats = &data->stats;
+		unsigned int start;
+		u64 dropped;
+
+		do {
+			start = u64_stats_fetch_begin_irq(&cpu_stats->syncp);
+			dropped = cpu_stats->dropped;
+		} while (u64_stats_fetch_retry_irq(&cpu_stats->syncp, start));
+
+		stats->dropped += dropped;
+	}
+}
+
+static int net_dm_stats_put(struct sk_buff *msg)
+{
+	struct net_dm_stats stats;
+	struct nlattr *attr;
+
+	net_dm_stats_read(&stats);
+
+	attr = nla_nest_start(msg, NET_DM_ATTR_STATS);
+	if (!attr)
+		return -EMSGSIZE;
+
+	if (nla_put_u64_64bit(msg, NET_DM_ATTR_STATS_DROPPED,
+			      stats.dropped, NET_DM_ATTR_PAD))
+		goto nla_put_failure;
+
+	nla_nest_end(msg, attr);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(msg, attr);
+	return -EMSGSIZE;
+}
+
+static int net_dm_stats_fill(struct sk_buff *msg, struct genl_info *info)
+{
+	void *hdr;
+	int rc;
+
+	hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+			  &net_drop_monitor_family, 0, NET_DM_CMD_STATS_NEW);
+	if (!hdr)
+		return -EMSGSIZE;
+
+	rc = net_dm_stats_put(msg);
+	if (rc)
+		goto nla_put_failure;
+
+	genlmsg_end(msg, hdr);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int net_dm_cmd_stats_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	int rc;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	rc = net_dm_stats_fill(msg, info);
+	if (rc)
+		goto free_msg;
+
+	return genlmsg_reply(msg, info);
+
+free_msg:
+	nlmsg_free(msg);
+	return rc;
+}
+
 static int dropmon_net_event(struct notifier_block *ev_block,
 			     unsigned long event, void *ptr)
 {
@@ -799,6 +895,10 @@ static const struct genl_ops dropmon_ops[] = {
 		.cmd = NET_DM_CMD_CONFIG_GET,
 		.doit = net_dm_cmd_config_get,
 	},
+	{
+		.cmd = NET_DM_CMD_STATS_GET,
+		.doit = net_dm_cmd_stats_get,
+	},
 };
 
 static int net_dm_nl_pre_doit(const struct genl_ops *ops,
@@ -865,6 +965,7 @@ static int __init init_net_drop_monitor(void)
 		data = &per_cpu(dm_cpu_data, cpu);
 		spin_lock_init(&data->lock);
 		skb_queue_head_init(&data->drop_queue);
+		u64_stats_init(&data->stats.syncp);
 	}
 
 	goto out;
-- 
2.21.0


^ permalink raw reply related

* [PATCH net] mlxsw: spectrum_ptp: Keep unmatched entries in a linked list
From: Ido Schimmel @ 2019-08-11  7:48 UTC (permalink / raw)
  To: netdev; +Cc: davem, jiri, petrm, richardcochran, mlxsw, Ido Schimmel

From: Petr Machata <petrm@mellanox.com>

To identify timestamps for matching with their packets, Spectrum-1 uses a
five-tuple of (port, direction, domain number, message type, sequence ID).
If there are several clients from the same domain behind a single port
sending Delay_Req's, the only thing differentiating these packets, as far
as Spectrum-1 is concerned, is the sequence ID. Should sequence IDs between
individual clients be similar, conflicts may arise. That is not a problem
to hardware, which will simply deliver timestamps on a first comes, first
served basis.

However the driver uses a simple hash table to store the unmatched pieces.
When a new conflicting piece arrives, it pushes out the previously stored
one, which if it is a packet, is delivered without timestamp. Later on as
the corresponding timestamps arrive, the first one is mismatched to the
second packet, and the second one is never matched and eventually is GCd.

To correct this issue, instead of using a simple rhashtable, use rhltable
to keep the unmatched entries.

Previously, a found unmatched entry would always be removed from the hash
table. That is not the case anymore--an incompatible entry is left in the
hash table. Therefore removal from the hash table cannot be used to confirm
the validity of the looked-up pointer, instead the lookup would simply need
to be redone. Therefore move it inside the critical section. This
simplifies a lot of the code.

Fixes: 8748642751ed ("mlxsw: spectrum: PTP: Support SIOCGHWTSTAMP, SIOCSHWTSTAMP ioctls")
Reported-by: Alex Veber <alexve@mellanox.com>
Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
---
 .../ethernet/mellanox/mlxsw/spectrum_ptp.c    | 138 +++++++-----------
 1 file changed, 55 insertions(+), 83 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
index 63b07edd9d81..38bb1cfe4e8c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ptp.c
@@ -29,7 +29,7 @@
 
 struct mlxsw_sp_ptp_state {
 	struct mlxsw_sp *mlxsw_sp;
-	struct rhashtable unmatched_ht;
+	struct rhltable unmatched_ht;
 	spinlock_t unmatched_lock; /* protects the HT */
 	struct delayed_work ht_gc_dw;
 	u32 gc_cycle;
@@ -45,7 +45,7 @@ struct mlxsw_sp1_ptp_key {
 
 struct mlxsw_sp1_ptp_unmatched {
 	struct mlxsw_sp1_ptp_key key;
-	struct rhash_head ht_node;
+	struct rhlist_head ht_node;
 	struct rcu_head rcu;
 	struct sk_buff *skb;
 	u64 timestamp;
@@ -359,7 +359,7 @@ static int mlxsw_sp_ptp_parse(struct sk_buff *skb,
 /* Returns NULL on successful insertion, a pointer on conflict, or an ERR_PTR on
  * error.
  */
-static struct mlxsw_sp1_ptp_unmatched *
+static int
 mlxsw_sp1_ptp_unmatched_save(struct mlxsw_sp *mlxsw_sp,
 			     struct mlxsw_sp1_ptp_key key,
 			     struct sk_buff *skb,
@@ -368,41 +368,51 @@ mlxsw_sp1_ptp_unmatched_save(struct mlxsw_sp *mlxsw_sp,
 	int cycles = MLXSW_SP1_PTP_HT_GC_TIMEOUT / MLXSW_SP1_PTP_HT_GC_INTERVAL;
 	struct mlxsw_sp_ptp_state *ptp_state = mlxsw_sp->ptp_state;
 	struct mlxsw_sp1_ptp_unmatched *unmatched;
-	struct mlxsw_sp1_ptp_unmatched *conflict;
+	int err;
 
 	unmatched = kzalloc(sizeof(*unmatched), GFP_ATOMIC);
 	if (!unmatched)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
 	unmatched->key = key;
 	unmatched->skb = skb;
 	unmatched->timestamp = timestamp;
 	unmatched->gc_cycle = mlxsw_sp->ptp_state->gc_cycle + cycles;
 
-	conflict = rhashtable_lookup_get_insert_fast(&ptp_state->unmatched_ht,
-					    &unmatched->ht_node,
-					    mlxsw_sp1_ptp_unmatched_ht_params);
-	if (conflict)
+	err = rhltable_insert(&ptp_state->unmatched_ht, &unmatched->ht_node,
+			      mlxsw_sp1_ptp_unmatched_ht_params);
+	if (err)
 		kfree(unmatched);
 
-	return conflict;
+	return err;
 }
 
 static struct mlxsw_sp1_ptp_unmatched *
 mlxsw_sp1_ptp_unmatched_lookup(struct mlxsw_sp *mlxsw_sp,
-			       struct mlxsw_sp1_ptp_key key)
+			       struct mlxsw_sp1_ptp_key key, int *p_length)
 {
-	return rhashtable_lookup(&mlxsw_sp->ptp_state->unmatched_ht, &key,
-				 mlxsw_sp1_ptp_unmatched_ht_params);
+	struct mlxsw_sp1_ptp_unmatched *unmatched, *last = NULL;
+	struct rhlist_head *tmp, *list;
+	int length = 0;
+
+	list = rhltable_lookup(&mlxsw_sp->ptp_state->unmatched_ht, &key,
+			       mlxsw_sp1_ptp_unmatched_ht_params);
+	rhl_for_each_entry_rcu(unmatched, tmp, list, ht_node) {
+		last = unmatched;
+		length++;
+	}
+
+	*p_length = length;
+	return last;
 }
 
 static int
 mlxsw_sp1_ptp_unmatched_remove(struct mlxsw_sp *mlxsw_sp,
 			       struct mlxsw_sp1_ptp_unmatched *unmatched)
 {
-	return rhashtable_remove_fast(&mlxsw_sp->ptp_state->unmatched_ht,
-				      &unmatched->ht_node,
-				      mlxsw_sp1_ptp_unmatched_ht_params);
+	return rhltable_remove(&mlxsw_sp->ptp_state->unmatched_ht,
+			       &unmatched->ht_node,
+			       mlxsw_sp1_ptp_unmatched_ht_params);
 }
 
 /* This function is called in the following scenarios:
@@ -489,75 +499,38 @@ static void mlxsw_sp1_ptp_got_piece(struct mlxsw_sp *mlxsw_sp,
 				    struct mlxsw_sp1_ptp_key key,
 				    struct sk_buff *skb, u64 timestamp)
 {
-	struct mlxsw_sp1_ptp_unmatched *unmatched, *conflict;
+	struct mlxsw_sp1_ptp_unmatched *unmatched;
+	int length;
 	int err;
 
 	rcu_read_lock();
 
-	unmatched = mlxsw_sp1_ptp_unmatched_lookup(mlxsw_sp, key);
-
 	spin_lock(&mlxsw_sp->ptp_state->unmatched_lock);
 
-	if (unmatched) {
-		/* There was an unmatched entry when we looked, but it may have
-		 * been removed before we took the lock.
-		 */
-		err = mlxsw_sp1_ptp_unmatched_remove(mlxsw_sp, unmatched);
-		if (err)
-			unmatched = NULL;
-	}
-
-	if (!unmatched) {
-		/* We have no unmatched entry, but one may have been added after
-		 * we looked, but before we took the lock.
-		 */
-		unmatched = mlxsw_sp1_ptp_unmatched_save(mlxsw_sp, key,
-							 skb, timestamp);
-		if (IS_ERR(unmatched)) {
-			if (skb)
-				mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb,
-							    key.local_port,
-							    key.ingress, NULL);
-			unmatched = NULL;
-		} else if (unmatched) {
-			/* Save just told us, under lock, that the entry is
-			 * there, so this has to work.
-			 */
-			err = mlxsw_sp1_ptp_unmatched_remove(mlxsw_sp,
-							     unmatched);
-			WARN_ON_ONCE(err);
-		}
-	}
-
-	/* If unmatched is non-NULL here, it comes either from the lookup, or
-	 * from the save attempt above. In either case the entry was removed
-	 * from the hash table. If unmatched is NULL, a new unmatched entry was
-	 * added to the hash table, and there was no conflict.
-	 */
-
+	unmatched = mlxsw_sp1_ptp_unmatched_lookup(mlxsw_sp, key, &length);
 	if (skb && unmatched && unmatched->timestamp) {
 		unmatched->skb = skb;
 	} else if (timestamp && unmatched && unmatched->skb) {
 		unmatched->timestamp = timestamp;
-	} else if (unmatched) {
-		/* unmatched holds an older entry of the same type: either an
-		 * skb if we are handling skb, or a timestamp if we are handling
-		 * timestamp. We can't match that up, so save what we have.
+	} else {
+		/* Either there is no entry to match, or one that is there is
+		 * incompatible.
 		 */
-		conflict = mlxsw_sp1_ptp_unmatched_save(mlxsw_sp, key,
-							skb, timestamp);
-		if (IS_ERR(conflict)) {
-			if (skb)
-				mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb,
-							    key.local_port,
-							    key.ingress, NULL);
-		} else {
-			/* Above, we removed an object with this key from the
-			 * hash table, under lock, so conflict can not be a
-			 * valid pointer.
-			 */
-			WARN_ON_ONCE(conflict);
-		}
+		if (length < 100)
+			err = mlxsw_sp1_ptp_unmatched_save(mlxsw_sp, key,
+							   skb, timestamp);
+		else
+			err = -E2BIG;
+		if (err && skb)
+			mlxsw_sp1_ptp_packet_finish(mlxsw_sp, skb,
+						    key.local_port,
+						    key.ingress, NULL);
+		unmatched = NULL;
+	}
+
+	if (unmatched) {
+		err = mlxsw_sp1_ptp_unmatched_remove(mlxsw_sp, unmatched);
+		WARN_ON_ONCE(err);
 	}
 
 	spin_unlock(&mlxsw_sp->ptp_state->unmatched_lock);
@@ -669,9 +642,8 @@ mlxsw_sp1_ptp_ht_gc_collect(struct mlxsw_sp_ptp_state *ptp_state,
 	local_bh_disable();
 
 	spin_lock(&ptp_state->unmatched_lock);
-	err = rhashtable_remove_fast(&ptp_state->unmatched_ht,
-				     &unmatched->ht_node,
-				     mlxsw_sp1_ptp_unmatched_ht_params);
+	err = rhltable_remove(&ptp_state->unmatched_ht, &unmatched->ht_node,
+			      mlxsw_sp1_ptp_unmatched_ht_params);
 	spin_unlock(&ptp_state->unmatched_lock);
 
 	if (err)
@@ -702,7 +674,7 @@ static void mlxsw_sp1_ptp_ht_gc(struct work_struct *work)
 	ptp_state = container_of(dwork, struct mlxsw_sp_ptp_state, ht_gc_dw);
 	gc_cycle = ptp_state->gc_cycle++;
 
-	rhashtable_walk_enter(&ptp_state->unmatched_ht, &iter);
+	rhltable_walk_enter(&ptp_state->unmatched_ht, &iter);
 	rhashtable_walk_start(&iter);
 	while ((obj = rhashtable_walk_next(&iter))) {
 		if (IS_ERR(obj))
@@ -855,8 +827,8 @@ struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp)
 
 	spin_lock_init(&ptp_state->unmatched_lock);
 
-	err = rhashtable_init(&ptp_state->unmatched_ht,
-			      &mlxsw_sp1_ptp_unmatched_ht_params);
+	err = rhltable_init(&ptp_state->unmatched_ht,
+			    &mlxsw_sp1_ptp_unmatched_ht_params);
 	if (err)
 		goto err_hashtable_init;
 
@@ -891,7 +863,7 @@ struct mlxsw_sp_ptp_state *mlxsw_sp1_ptp_init(struct mlxsw_sp *mlxsw_sp)
 err_mtptpt1_set:
 	mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0);
 err_mtptpt_set:
-	rhashtable_destroy(&ptp_state->unmatched_ht);
+	rhltable_destroy(&ptp_state->unmatched_ht);
 err_hashtable_init:
 	kfree(ptp_state);
 	return ERR_PTR(err);
@@ -906,8 +878,8 @@ void mlxsw_sp1_ptp_fini(struct mlxsw_sp_ptp_state *ptp_state)
 	mlxsw_sp1_ptp_set_fifo_clr_on_trap(mlxsw_sp, false);
 	mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP1, 0);
 	mlxsw_sp_ptp_mtptpt_set(mlxsw_sp, MLXSW_REG_MTPTPT_TRAP_ID_PTP0, 0);
-	rhashtable_free_and_destroy(&ptp_state->unmatched_ht,
-				    &mlxsw_sp1_ptp_unmatched_free_fn, NULL);
+	rhltable_free_and_destroy(&ptp_state->unmatched_ht,
+				  &mlxsw_sp1_ptp_unmatched_free_fn, NULL);
 	kfree(ptp_state);
 }
 
-- 
2.21.0


^ permalink raw reply related

* Re: [PATCH net-next v2 1/1] net: dsa: fix fixed-link port registration
From: Sergei Shtylyov @ 2019-08-11  9:54 UTC (permalink / raw)
  To: Marek Behún, netdev
  Cc: Heiner Kallweit, Sebastian Reichel, Vivien Didelot, Andrew Lunn,
	Florian Fainelli, David S . Miller
In-Reply-To: <20190811031857.2899-1-marek.behun@nic.cz>

Hello!

    Just noticed a comment typo...

On 11.08.2019 6:18, Marek Behún wrote:

> Commit 88d6272acaaa ("net: phy: avoid unneeded MDIO reads in
> genphy_read_status") broke fixed link DSA port registration in
> dsa_port_fixed_link_register_of: the genphy_read_status does not do what
> it is supposed to and the following adjust_link is given wrong
> parameters.
> 
> This causes a regression on Turris Omnia, where the mvneta driver for
> the interface connected to the switch reports crc errors, for some
> reason.
> 
> I realize this fix is not ideal, something else could change in genphy
> functions which could cause DSA fixed-link port to break again.
> Hopefully DSA fixed-link port functionality will be converted to phylink
> API soon.
> 
> Signed-off-by: Marek Behún <marek.behun@nic.cz>
> Cc: Heiner Kallweit <hkallweit1@gmail.com>
> Cc: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
> Cc: Vivien Didelot <vivien.didelot@gmail.com>
> Cc: Andrew Lunn <andrew@lunn.ch>
> Cc: Florian Fainelli <f.fainelli@gmail.com>
> Cc: David S. Miller <davem@davemloft.net>
> ---
>   net/dsa/port.c | 11 +++++++++++
>   1 file changed, 11 insertions(+)
> 
> diff --git a/net/dsa/port.c b/net/dsa/port.c
> index 363eab6df51b..c424ebb373e1 100644
> --- a/net/dsa/port.c
> +++ b/net/dsa/port.c
> @@ -485,6 +485,17 @@ static int dsa_port_fixed_link_register_of(struct dsa_port *dp)
>   	phydev->interface = mode;
>   
>   	genphy_config_init(phydev);
> +
> +	/*
> +	 * Commit 88d6272acaaa caused genphy_read_status not to do it's work if
                                                                    ^^^^ its.

> +	 * autonegotiation is enabled and link status did not change. This is
> +	 * the case for fixed_phy. By setting phydev->link = 0 before the call
> +	 * to genphy_read_status we force it to read and fill in the parameters.
> +	 *
> +	 * Hopefully this dirty hack will be removed soon by converting DSA
> +	 * fixed link ports to phylink API.
> +	 */
> +	phydev->link = 0;
>   	genphy_read_status(phydev);
>   
>   	if (ds->ops->adjust_link)

MBR, Sergei


^ permalink raw reply

* Re: [PATCH 11/11] treewide: remove dummy Makefiles for single targets
From: Leon Romanovsky @ 2019-08-11  9:55 UTC (permalink / raw)
  To: Masahiro Yamada
  Cc: linux-kbuild@vger.kernel.org, Christoph Hellwig, Sam Ravnborg,
	Alexei Starovoitov, Boris Pismenny, Daniel Borkmann,
	David S. Miller, Igor Russkikh, Jakub Kicinski, Martin KaFai Lau,
	Saeed Mahameed, Song Liu, Yonghong Song, bpf@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-rdma@vger.kernel.org,
	netdev@vger.kernel.org, oss-drivers@netronome.com
In-Reply-To: <20190810155307.29322-12-yamada.masahiro@socionext.com>

On Sun, Aug 11, 2019 at 12:53:07AM +0900, Masahiro Yamada wrote:
> Now that the single target build descends into sub-directories
> in the same ways as the normal build, these dummy Makefiles
> are not needed any more.
>
> Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
> ---
>

It is hard to test/review/ack on this patch without seeing previous
patches, especially patch #10 where you changed logic of single targets.

Thanks

^ permalink raw reply

* Re: [PATCH net-next] net: openvswitch: Set OvS recirc_id from tc chain index
From: Paul Blakey @ 2019-08-11 10:46 UTC (permalink / raw)
  To: Pravin Shelar
  Cc: Linux Kernel Network Developers, David S. Miller, Justin Pettit,
	Simon Horman, Marcelo Ricardo Leitner, Vlad Buslov, Jiri Pirko,
	Roi Dayan, Yossi Kuperman, Rony Efraim, Oz Shlomo
In-Reply-To: <CAOrHB_DhfiQy8RwTiwgn9ZXgsd5j2f0ynZPUP4wf-xzhjwo8kg@mail.gmail.com>


On 8/8/2019 11:53 PM, Pravin Shelar wrote:
> On Wed, Aug 7, 2019 at 5:08 AM Paul Blakey <paulb@mellanox.com> wrote:
>> Offloaded OvS datapath rules are translated one to one to tc rules,
>> for example the following simplified OvS rule:
>>
>> recirc_id(0),in_port(dev1),eth_type(0x0800),ct_state(-trk) actions:ct(),recirc(2)
>>
>> Will be translated to the following tc rule:
>>
>> $ tc filter add dev dev1 ingress \
>>              prio 1 chain 0 proto ip \
>>                  flower tcp ct_state -trk \
>>                  action ct pipe \
>>                  action goto chain 2
>>
>> Received packets will first travel though tc, and if they aren't stolen
>> by it, like in the above rule, they will continue to OvS datapath.
>> Since we already did some actions (action ct in this case) which might
>> modify the packets, and updated action stats, we would like to continue
>> the proccessing with the correct recirc_id in OvS (here recirc_id(2))
>> where we left off.
>>
>> To support this, introduce a new skb extension for tc, which
>> will be used for translating tc chain to ovs recirc_id to
>> handle these miss cases. Last tc chain index will be set
>> by tc goto chain action and read by OvS datapath.
>>
>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>> Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
>> Acked-by: Jiri Pirko <jiri@mellanox.com>
>> ---
>>   include/linux/skbuff.h    | 13 +++++++++++++
>>   include/net/sch_generic.h |  5 ++++-
>>   net/core/skbuff.c         |  6 ++++++
>>   net/openvswitch/flow.c    |  9 +++++++++
>>   net/sched/Kconfig         | 13 +++++++++++++
>>   net/sched/act_api.c       |  1 +
>>   net/sched/cls_api.c       | 12 ++++++++++++
>>   7 files changed, 58 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
>> index 3aef8d8..fb2a792 100644
>> --- a/include/linux/skbuff.h
>> +++ b/include/linux/skbuff.h
>> @@ -279,6 +279,16 @@ struct nf_bridge_info {
>>   };
>>   #endif
>>
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +/* Chain in tc_skb_ext will be used to share the tc chain with
>> + * ovs recirc_id. It will be set to the current chain by tc
>> + * and read by ovs to recirc_id.
>> + */
>> +struct tc_skb_ext {
>> +       __u32 chain;
>> +};
>> +#endif
>> +
>>   struct sk_buff_head {
>>          /* These two members must be first. */
>>          struct sk_buff  *next;
>> @@ -4050,6 +4060,9 @@ enum skb_ext_id {
>>   #ifdef CONFIG_XFRM
>>          SKB_EXT_SEC_PATH,
>>   #endif
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +       TC_SKB_EXT,
>> +#endif
>>          SKB_EXT_NUM, /* must be last */
>>   };
>>
>> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
>> index 6b6b012..871feea 100644
>> --- a/include/net/sch_generic.h
>> +++ b/include/net/sch_generic.h
>> @@ -275,7 +275,10 @@ struct tcf_result {
>>                          unsigned long   class;
>>                          u32             classid;
>>                  };
>> -               const struct tcf_proto *goto_tp;
>> +               struct {
>> +                       const struct tcf_proto *goto_tp;
>> +                       u32 goto_index;
>> +               };
>>
>>                  /* used in the skb_tc_reinsert function */
>>                  struct {
>> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
>> index ea8e8d3..2b40b5a 100644
>> --- a/net/core/skbuff.c
>> +++ b/net/core/skbuff.c
>> @@ -4087,6 +4087,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
>>   #ifdef CONFIG_XFRM
>>          [SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
>>   #endif
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +       [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
>> +#endif
>>   };
>>
>>   static __always_inline unsigned int skb_ext_total_length(void)
>> @@ -4098,6 +4101,9 @@ static __always_inline unsigned int skb_ext_total_length(void)
>>   #ifdef CONFIG_XFRM
>>                  skb_ext_type_len[SKB_EXT_SEC_PATH] +
>>   #endif
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +               skb_ext_type_len[TC_SKB_EXT] +
>> +#endif
>>                  0;
>>   }
>>
>> diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
>> index bc89e16..0287ead 100644
>> --- a/net/openvswitch/flow.c
>> +++ b/net/openvswitch/flow.c
>> @@ -816,6 +816,9 @@ static int key_extract_mac_proto(struct sk_buff *skb)
>>   int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
>>                           struct sk_buff *skb, struct sw_flow_key *key)
>>   {
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +       struct tc_skb_ext *tc_ext;
>> +#endif
>>          int res, err;
>>
>>          /* Extract metadata from packet. */
>> @@ -848,7 +851,13 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
>>          if (res < 0)
>>                  return res;
>>          key->mac_proto = res;
>> +
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +       tc_ext = skb_ext_find(skb, TC_SKB_EXT);
>> +       key->recirc_id = tc_ext ? tc_ext->chain : 0;
>> +#else
>>          key->recirc_id = 0;
>> +#endif
>>
> Most of cases the config would be turned on, so the ifdef is not that
> useful. Can you add static key to avoid searching the skb-ext in non
> offload cases.

Hi,

What do you mean by a static key?


^ permalink raw reply

* Re: [PATCH net-next] net: openvswitch: Set OvS recirc_id from tc chain index
From: Paul Blakey @ 2019-08-11 10:48 UTC (permalink / raw)
  To: Pravin Shelar
  Cc: Linux Kernel Network Developers, David S. Miller, Justin Pettit,
	Simon Horman, Marcelo Ricardo Leitner, Vlad Buslov, Jiri Pirko,
	Roi Dayan, Yossi Kuperman, Rony Efraim, Oz Shlomo
In-Reply-To: <CAOrHB_DhfiQy8RwTiwgn9ZXgsd5j2f0ynZPUP4wf-xzhjwo8kg@mail.gmail.com>


On 8/8/2019 11:53 PM, Pravin Shelar wrote:
> On Wed, Aug 7, 2019 at 5:08 AM Paul Blakey <paulb@mellanox.com> wrote:
>> Offloaded OvS datapath rules are translated one to one to tc rules,
>> for example the following simplified OvS rule:
>>
>> recirc_id(0),in_port(dev1),eth_type(0x0800),ct_state(-trk) actions:ct(),recirc(2)
>>
>> Will be translated to the following tc rule:
>>
>> $ tc filter add dev dev1 ingress \
>>              prio 1 chain 0 proto ip \
>>                  flower tcp ct_state -trk \
>>                  action ct pipe \
>>                  action goto chain 2
>>
>> Received packets will first travel though tc, and if they aren't stolen
>> by it, like in the above rule, they will continue to OvS datapath.
>> Since we already did some actions (action ct in this case) which might
>> modify the packets, and updated action stats, we would like to continue
>> the proccessing with the correct recirc_id in OvS (here recirc_id(2))
>> where we left off.
>>
>> To support this, introduce a new skb extension for tc, which
>> will be used for translating tc chain to ovs recirc_id to
>> handle these miss cases. Last tc chain index will be set
>> by tc goto chain action and read by OvS datapath.
>>
>> Signed-off-by: Paul Blakey <paulb@mellanox.com>
>> Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
>> Acked-by: Jiri Pirko <jiri@mellanox.com>
>> ---
>>   include/linux/skbuff.h    | 13 +++++++++++++
>>   include/net/sch_generic.h |  5 ++++-
>>   net/core/skbuff.c         |  6 ++++++
>>   net/openvswitch/flow.c    |  9 +++++++++
>>   net/sched/Kconfig         | 13 +++++++++++++
>>   net/sched/act_api.c       |  1 +
>>   net/sched/cls_api.c       | 12 ++++++++++++
>>   7 files changed, 58 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
>> index 3aef8d8..fb2a792 100644
>> --- a/include/linux/skbuff.h
>> +++ b/include/linux/skbuff.h
>> @@ -279,6 +279,16 @@ struct nf_bridge_info {
>>   };
>>   #endif
>>
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +/* Chain in tc_skb_ext will be used to share the tc chain with
>> + * ovs recirc_id. It will be set to the current chain by tc
>> + * and read by ovs to recirc_id.
>> + */
>> +struct tc_skb_ext {
>> +       __u32 chain;
>> +};
>> +#endif
>> +
>>   struct sk_buff_head {
>>          /* These two members must be first. */
>>          struct sk_buff  *next;
>> @@ -4050,6 +4060,9 @@ enum skb_ext_id {
>>   #ifdef CONFIG_XFRM
>>          SKB_EXT_SEC_PATH,
>>   #endif
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +       TC_SKB_EXT,
>> +#endif
>>          SKB_EXT_NUM, /* must be last */
>>   };
>>
>> diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
>> index 6b6b012..871feea 100644
>> --- a/include/net/sch_generic.h
>> +++ b/include/net/sch_generic.h
>> @@ -275,7 +275,10 @@ struct tcf_result {
>>                          unsigned long   class;
>>                          u32             classid;
>>                  };
>> -               const struct tcf_proto *goto_tp;
>> +               struct {
>> +                       const struct tcf_proto *goto_tp;
>> +                       u32 goto_index;
>> +               };
>>
>>                  /* used in the skb_tc_reinsert function */
>>                  struct {
>> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
>> index ea8e8d3..2b40b5a 100644
>> --- a/net/core/skbuff.c
>> +++ b/net/core/skbuff.c
>> @@ -4087,6 +4087,9 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
>>   #ifdef CONFIG_XFRM
>>          [SKB_EXT_SEC_PATH] = SKB_EXT_CHUNKSIZEOF(struct sec_path),
>>   #endif
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +       [TC_SKB_EXT] = SKB_EXT_CHUNKSIZEOF(struct tc_skb_ext),
>> +#endif
>>   };
>>
>>   static __always_inline unsigned int skb_ext_total_length(void)
>> @@ -4098,6 +4101,9 @@ static __always_inline unsigned int skb_ext_total_length(void)
>>   #ifdef CONFIG_XFRM
>>                  skb_ext_type_len[SKB_EXT_SEC_PATH] +
>>   #endif
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +               skb_ext_type_len[TC_SKB_EXT] +
>> +#endif
>>                  0;
>>   }
>>
>> diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
>> index bc89e16..0287ead 100644
>> --- a/net/openvswitch/flow.c
>> +++ b/net/openvswitch/flow.c
>> @@ -816,6 +816,9 @@ static int key_extract_mac_proto(struct sk_buff *skb)
>>   int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
>>                           struct sk_buff *skb, struct sw_flow_key *key)
>>   {
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +       struct tc_skb_ext *tc_ext;
>> +#endif
>>          int res, err;
>>
>>          /* Extract metadata from packet. */
>> @@ -848,7 +851,13 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
>>          if (res < 0)
>>                  return res;
>>          key->mac_proto = res;
>> +
>> +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
>> +       tc_ext = skb_ext_find(skb, TC_SKB_EXT);
>> +       key->recirc_id = tc_ext ? tc_ext->chain : 0;
>> +#else
>>          key->recirc_id = 0;
>> +#endif
>>
> Most of cases the config would be turned on, so the ifdef is not that
> useful. Can you add static key to avoid searching the skb-ext in non
> offload cases.


Also regarding the ifdefs, yeah , I need them unless I always define the 
enum, which other extension don't do.



^ permalink raw reply

* Re: [PATCH] xen-netback: no need to check return value of debugfs_create functions
From: Wei Liu @ 2019-08-11 10:49 UTC (permalink / raw)
  To: Greg Kroah-Hartman; +Cc: Wei Liu, Paul Durrant, xen-devel, netdev
In-Reply-To: <20190810103108.GA29487@kroah.com>

On Sat, Aug 10, 2019 at 12:31:08PM +0200, Greg Kroah-Hartman wrote:
> When calling debugfs functions, there is no need to ever check the
> return value.  The function can work or not, but the code logic should
> never do something different based on this.
> 
> Cc: Wei Liu <wei.liu@kernel.org>
> Cc: Paul Durrant <paul.durrant@citrix.com>
> Cc: xen-devel@lists.xenproject.org
> Cc: netdev@vger.kernel.org
> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Acked-by: Wei Liu <wei.liu@kernel.org>

^ permalink raw reply

* Re: [PATCH] Documentation/networking/af_xdp: Inhibit reference to struct socket
From: Jonathan Neuschäfer @ 2019-08-11 11:32 UTC (permalink / raw)
  To: Jonathan Corbet
  Cc: Jonathan Neuschäfer, linux-doc, David S. Miller,
	Alexei Starovoitov, Daniel Borkmann, Jakub Kicinski,
	Jesper Dangaard Brouer, John Fastabend, netdev, xdp-newbies, bpf,
	linux-kernel
In-Reply-To: <20190810085821.11cee8b0@lwn.net>

[-- Attachment #1: Type: text/plain, Size: 971 bytes --]

On Sat, Aug 10, 2019 at 08:58:21AM -0600, Jonathan Corbet wrote:
> On Sat, 10 Aug 2019 14:17:37 +0200
> Jonathan Neuschäfer <j.neuschaefer@gmx.net> wrote:
> 
> > With the recent change to auto-detect function names, Sphinx parses
> > socket() as a reference to the in-kernel definition of socket. It then
> > decides that struct socket is a good match, which was obviously not
> > intended in this case, because the text speaks about the syscall with
> > the same name.
> > 
> > Prevent socket() from being misinterpreted by wrapping it in ``inline
> > literal`` quotes.
> > 
> > Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
> 
> Thanks for looking at that.  The better fix, though, would be to add
> socket() to the Skipfuncs array in Documentation/sphinx/automarkup.py.
> Then it will do the right thing everywhere without the need to add markup
> to the RST files.

Alright, I'll do that for v2.


Thanks,
Jonathan Neuschäfer

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply

* Re: [PATCH net-next v2 1/1] net: dsa: fix fixed-link port registration
From: Heiner Kallweit @ 2019-08-11 11:35 UTC (permalink / raw)
  To: Marek Behún
  Cc: Andrew Lunn, netdev, Sebastian Reichel, Vivien Didelot,
	Florian Fainelli, David S . Miller
In-Reply-To: <20190811033910.GL30120@lunn.ch>

On 11.08.2019 05:39, Andrew Lunn wrote:
> On Sun, Aug 11, 2019 at 05:18:57AM +0200, Marek Behún wrote:
>> Commit 88d6272acaaa ("net: phy: avoid unneeded MDIO reads in
>> genphy_read_status") broke fixed link DSA port registration in
>> dsa_port_fixed_link_register_of: the genphy_read_status does not do what
>> it is supposed to and the following adjust_link is given wrong
>> parameters.
> 
> Hi Marek
> 
> Which parameters are incorrect?
> 
> In fixed_phy.c, __fixed_phy_register() there is:
> 
>         /* propagate the fixed link values to struct phy_device */
>         phy->link = status->link;
>         if (status->link) {
>                 phy->speed = status->speed;
>                 phy->duplex = status->duplex;
>                 phy->pause = status->pause;
>                 phy->asym_pause = status->asym_pause;
>         }
> 
> Are we not initialising something? Or is the initialisation done here
> getting reset sometime afterwards?
> 
In addition to Andrew's question:
We talk about this DT config: armada-385-turris-omnia.dts ?
Which kernel version are you using?

> Thanks
> 	Andrew
> 
Heiner

^ permalink raw reply

* [BUG] fec mdio times out under system stress
From: Russell King - ARM Linux admin @ 2019-08-11 13:37 UTC (permalink / raw)
  To: linux-arm-kernel, Fabio Estevam, netdev, Andrew Lunn,
	Florian Fainelli, Heiner Kallweit

Hi Fabio,

When I woke up this morning, I found that one of the Hummingboards
had gone offline (as in, lost network link) during the night.
Investigating, I find that the system had gone into OOM, and at
that time, triggered an unrelated:

[4111697.698776] fec 2188000.ethernet eth0: MDIO read timeout
[4111697.712996] MII_DATA: 0x6006796d
[4111697.729415] MII_SPEED: 0x0000001a
[4111697.745232] IEVENT: 0x00000000
[4111697.745242] IMASK: 0x0a8000aa
[4111698.002233] Atheros 8035 ethernet 2188000.ethernet-1:00: PHY state change RUNNING -> HALTED
[4111698.009882] fec 2188000.ethernet eth0: Link is Down

This is on a dual-core iMX6.

It looks like the read actually completed (since MII_DATA contains
the register data) but we somehow lost the interrupt (or maybe
received the interrupt after wait_for_completion_timeout() timed
out.)

From what I can see, the OOM events happened on CPU1, CPU1 was
allocated the FEC interrupt, and the PHY polling that suffered the
MDIO timeout was on CPU0.

Given that IEVENT is zero, it seems that CPU1 had read serviced the
interrupt, but it is not clear how far through processing that it
was - it may be that fec_enet_interrupt() had been delayed by the
OOM condition.

This seems rather fragile - as the system slowing down due to OOM
triggers the network to completely collapse by phylib taking the
PHY offline, making the system inaccessible except through the
console.

In my case, even serial console wasn't operational (except for
magic sysrq).  Not sure what agetty was playing at... so the only
way I could recover any information from the system was to connect
the HDMI and plug in a USB keyboard.

Any thoughts on how FEC MDIO accesses could be made more robust?

Maybe phylib should retry a number of times - but with read-sensitive
registers, if the read has already completed successfully, and its
just a problem with the FEC MDIO hardware, that could cause issues.

Thanks.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
According to speedtest.net: 11.9Mbps down 500kbps up

^ permalink raw reply

* Re: [PATCH net-next v2 1/1] net: dsa: fix fixed-link port registration
From: Marek Behun @ 2019-08-11 14:04 UTC (permalink / raw)
  To: Heiner Kallweit
  Cc: Andrew Lunn, netdev, Sebastian Reichel, Vivien Didelot,
	Florian Fainelli, David S . Miller
In-Reply-To: <91cd70df-c856-4c7e-7ebb-c01519fb13d2@gmail.com>

OK guys, something is terribly wrong here.

I bisected to the commit mentioned (88d6272acaaa), looked around at the
genphy functions, tried adding the link=0 workaround and it did work,
so I though this was the issue.

What I realized now is that before the commit 88d6272acaaa things
worked because of two bugs, which negated each other. This commit caused
one of this bugs not to fire, and thus the second bug was not negated.

What actually happened before the commit that broke it is this:
  - after the fixed_phy is created, the parameters are corrent
  - genphy_read_status breaks the parameters:
     - first it sets the parameters to unknown (SPEED_UNKNOWN,
       DUPLEX_UNKNOWN)
     - then read the registers, which are simulated for fixed_phy
     - then it uses phy-core.c:phy_resolve_aneg_linkmode function, which
       looks for correct settings by bit-anding the ->advertising and
       ->lp_advertigins bit arrays. But in fixed_phy, ->lp_advertising
       is set to zero, so the parameters are left at SPEED_UNKNOWN, ...
       (this is the first bug)
  - then adjust_link is called, which then goes to
    mv88e6xxx_port_setup_mac, where there is a test if it should change
    something:
       if (state.link == link && state.speed == speed &&
           state.duplex == duplex)
               return 0;
  - since current speed on the switch port (state.speed) is SPEED_1000,
    and new speed is SPEED_UNKNOWN, this test fails, and so the rest of
    this function is called, which makes the port work
    (the if test is the second bug)

After the commit that broke things:
  - after the fixed_phy is created, the parameters are corrent
  - genphy_read_status doesn't change them
  - mv88e6xxx_port_setup_mac does nothing, since the if condition above
    is true

So, there are two things that are broken:
 - the test in mv88e6xxx_port_setup_mac whether there is to be a change
   should be more sophisticated
 - fixed_phy should also simulate the lp_advertising register

What do you think of this?

Marek

On Sun, 11 Aug 2019 13:35:20 +0200
Heiner Kallweit <hkallweit1@gmail.com> wrote:

> On 11.08.2019 05:39, Andrew Lunn wrote:
> > On Sun, Aug 11, 2019 at 05:18:57AM +0200, Marek Behún wrote:  
> >> Commit 88d6272acaaa ("net: phy: avoid unneeded MDIO reads in
> >> genphy_read_status") broke fixed link DSA port registration in
> >> dsa_port_fixed_link_register_of: the genphy_read_status does not do what
> >> it is supposed to and the following adjust_link is given wrong
> >> parameters.  
> > 
> > Hi Marek
> > 
> > Which parameters are incorrect?
> > 
> > In fixed_phy.c, __fixed_phy_register() there is:
> > 
> >         /* propagate the fixed link values to struct phy_device */
> >         phy->link = status->link;
> >         if (status->link) {
> >                 phy->speed = status->speed;
> >                 phy->duplex = status->duplex;
> >                 phy->pause = status->pause;
> >                 phy->asym_pause = status->asym_pause;
> >         }
> > 
> > Are we not initialising something? Or is the initialisation done here
> > getting reset sometime afterwards?
> >   
> In addition to Andrew's question:
> We talk about this DT config: armada-385-turris-omnia.dts ?
> Which kernel version are you using?
> 
> > Thanks
> > 	Andrew
> >   
> Heiner


^ permalink raw reply

* Re: [BUG] fec mdio times out under system stress
From: Russell King - ARM Linux admin @ 2019-08-11 14:06 UTC (permalink / raw)
  To: linux-arm-kernel, Fabio Estevam, netdev, Andrew Lunn,
	Florian Fainelli, Heiner Kallweit
In-Reply-To: <20190811133707.GC13294@shell.armlinux.org.uk>

On Sun, Aug 11, 2019 at 02:37:07PM +0100, Russell King - ARM Linux admin wrote:
> Hi Fabio,
> 
> When I woke up this morning, I found that one of the Hummingboards
> had gone offline (as in, lost network link) during the night.
> Investigating, I find that the system had gone into OOM, and at
> that time, triggered an unrelated:
> 
> [4111697.698776] fec 2188000.ethernet eth0: MDIO read timeout
> [4111697.712996] MII_DATA: 0x6006796d
> [4111697.729415] MII_SPEED: 0x0000001a
> [4111697.745232] IEVENT: 0x00000000
> [4111697.745242] IMASK: 0x0a8000aa
> [4111698.002233] Atheros 8035 ethernet 2188000.ethernet-1:00: PHY state change RUNNING -> HALTED
> [4111698.009882] fec 2188000.ethernet eth0: Link is Down
> 
> This is on a dual-core iMX6.
> 
> It looks like the read actually completed (since MII_DATA contains
> the register data) but we somehow lost the interrupt (or maybe
> received the interrupt after wait_for_completion_timeout() timed
> out.)
> 
> From what I can see, the OOM events happened on CPU1, CPU1 was
> allocated the FEC interrupt, and the PHY polling that suffered the
> MDIO timeout was on CPU0.
> 
> Given that IEVENT is zero, it seems that CPU1 had read serviced the
> interrupt, but it is not clear how far through processing that it
> was - it may be that fec_enet_interrupt() had been delayed by the
> OOM condition.
> 
> This seems rather fragile - as the system slowing down due to OOM
> triggers the network to completely collapse by phylib taking the
> PHY offline, making the system inaccessible except through the
> console.
> 
> In my case, even serial console wasn't operational (except for
> magic sysrq).  Not sure what agetty was playing at... so the only
> way I could recover any information from the system was to connect
> the HDMI and plug in a USB keyboard.
> 
> Any thoughts on how FEC MDIO accesses could be made more robust?
> 
> Maybe phylib should retry a number of times - but with read-sensitive
> registers, if the read has already completed successfully, and its
> just a problem with the FEC MDIO hardware, that could cause issues.

I should also note for the phylib people:

After phylib has received an error, and has entered the HALTED state,
downing the interface produces this warning, which seems rather unfair
as the FEC doesn't know that its PHY suffered an error - it is merely
reversing the phy_start() that happened when the interface was opened.

[4144039.099786] ------------[ cut here ]------------
[4144039.109001] WARNING: CPU: 1 PID: 25842 at drivers/net/phy/phy.c:835 fec_enet_close+0x14/0x148
[4144039.124366] called from state HALTED
[4144039.132626] Modules linked in: 8021q brcmfmac brcmutil imx_thermal
snd_soc_imx_spdif snd_soc_imx_audmux nvmem_imx_ocotp cfg80211
snd_soc_sgtl5000 imx_sdma
virt_dma rc_cec snd_soc_fsl_ssi snd_soc_fsl_spdif coda imx_pcm_dma
v4l2_mem2mem
imx_vdoa etnaviv videobuf2_dma_contig gpu_sched dw_hdmi_cec
dw_hdmi_ahb_audio imx6q_cpufreq caamrng caam_jr caam error ip_tables
x_tables [last unloaded: evbug][4144039.177249] CPU: 1 PID: 25842 Comm:
ip Tainted: G        W         5.1.0+ #319
[4144039.189477] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
[4144039.201025] [<c00194f0>] (unwind_backtrace) from [<c0014748>] (show_stack+0x10/0x14)
[4144039.213769] [<c0014748>] (show_stack) from [<c082e11c>] (dump_stack+0x9c/0xd4)
[4144039.225959] [<c082e11c>] (dump_stack) from [<c0030f3c>] (__warn+0xf8/0x124)
[4144039.237828] [<c0030f3c>] (__warn) from [<c0031030>] (warn_slowpath_fmt+0x38/0x48)
[4144039.250275] [<c0031030>] (warn_slowpath_fmt) from [<c052cb14>] (fec_enet_close+0x14/0x148)
[4144039.263558] [<c052cb14>] (fec_enet_close) from [<c066b5bc>] (__dev_close_many+0x88/0xf0)
[4144039.276639] [<c066b5bc>] (__dev_close_many) from [<c06740cc>] (__dev_change_flags+0xa4/0x1a0)
[4144039.290130] [<c06740cc>] (__dev_change_flags) from [<c06741e8>] (dev_change_flags+0x18/0x48)
[4144039.303558] [<c06741e8>] (dev_change_flags) from [<c068db80>] (do_setlink+0x29c/0x990)
[4144039.316517] [<c068db80>] (do_setlink) from [<c068eae8>] (__rtnl_newlink+0x4a4/0x72c)
[4144039.329282] [<c068eae8>] (__rtnl_newlink) from [<c068edb0>] (rtnl_newlink+0x40/0x60)
[4144039.342023] [<c068edb0>] (rtnl_newlink) from [<c0689a10>] (rtnetlink_rcv_msg+0x244/0x470)
[4144039.355224] [<c0689a10>] (rtnetlink_rcv_msg) from [<c06d20a0>] (netlink_rcv_skb+0xe0/0xf4)
[4144039.368492] [<c06d20a0>] (netlink_rcv_skb) from [<c06d194c>] (netlink_unicast+0x170/0x1b8)
[4144039.381758] [<c06d194c>] (netlink_unicast) from [<c06d1cdc>] (netlink_sendmsg+0x2b8/0x340)
[4144039.395023] [<c06d1cdc>] (netlink_sendmsg) from [<c064999c>] (sock_sendmsg+0x14/0x24)
[4144039.407825] [<c064999c>] (sock_sendmsg) from [<c064a5a0>] (___sys_sendmsg+0x200/0x214)
[4144039.420714] [<c064a5a0>] (___sys_sendmsg) from [<c064b0c4>] (__sys_sendmsg+0x40/0x6c)
[4144039.433506] [<c064b0c4>] (__sys_sendmsg) from [<c0009000>] (ret_fast_syscall+0x0/0x28)
[4144039.446366] Exception stack(0xe1933fa8 to 0xe1933ff0)
[4144039.456329] 3fa0:                   bea086f0 bea086bc 00000003 bea086d0 00000000 00000000
[4144039.469464] 3fc0: bea086f0 bea086bc 00000000 00000128 0062a278 bea086d0 5d5011f3 0062a000
[4144039.482590] 3fe0: 00000128 bea08680 b6dfc4cb b6d7d6f6
[4144039.492652] irq event stamp: 0
[4144039.500650] hardirqs last  enabled at (0): [<00000000>]   (null)
[4144039.511628] hardirqs last disabled at (0): [<c002e55c>] copy_process.part.4+0x30c/0x19e8
[4144039.524691] softirqs last  enabled at (0): [<c002e55c>] copy_process.part.4+0x30c/0x19e8
[4144039.537691] softirqs last disabled at (0): [<00000000>]   (null)
[4144039.548554] ---[ end trace b5e8d4b0f30ae00b ]---


-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
According to speedtest.net: 11.9Mbps down 500kbps up

^ permalink raw reply

* [PATCH net v3] net: dsa: Check existence of .port_mdb_add callback before calling it
From: Chen-Yu Tsai @ 2019-08-11 14:18 UTC (permalink / raw)
  To: Andrew Lunn, Vivien Didelot, Florian Fainelli, David S. Miller
  Cc: Chen-Yu Tsai, netdev, linux-kernel

From: Chen-Yu Tsai <wens@csie.org>

The dsa framework has optional .port_mdb_{prepare,add,del} callback fields
for drivers to handle multicast database entries. When adding an entry, the
framework goes through a prepare phase, then a commit phase. Drivers not
providing these callbacks should be detected in the prepare phase.

DSA core may still bypass the bridge layer and call the dsa_port_mdb_add
function directly with no prepare phase or no switchdev trans object,
and the framework ends up calling an undefined .port_mdb_add callback.
This results in a NULL pointer dereference, as shown in the log below.

The other functions seem to be properly guarded. Do the same for
.port_mdb_add in dsa_switch_mdb_add_bitmap() as well.

    8<--- cut here ---
    Unable to handle kernel NULL pointer dereference at virtual address 00000000
    pgd = (ptrval)
    [00000000] *pgd=00000000
    Internal error: Oops: 80000005 [#1] SMP ARM
    Modules linked in: rtl8xxxu rtl8192cu rtl_usb rtl8192c_common rtlwifi mac80211 cfg80211
    CPU: 1 PID: 134 Comm: kworker/1:2 Not tainted 5.3.0-rc1-00247-gd3519030752a #1
    Hardware name: Allwinner sun7i (A20) Family
    Workqueue: events switchdev_deferred_process_work
    PC is at 0x0
    LR is at dsa_switch_event+0x570/0x620
    pc : [<00000000>]    lr : [<c08533ec>]    psr: 80070013
    sp : ee871db8  ip : 00000000  fp : ee98d0a4
    r10: 0000000c  r9 : 00000008  r8 : ee89f710
    r7 : ee98d040  r6 : ee98d088  r5 : c0f04c48  r4 : ee98d04c
    r3 : 00000000  r2 : ee89f710  r1 : 00000008  r0 : ee98d040
    Flags: Nzcv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment none
    Control: 10c5387d  Table: 6deb406a  DAC: 00000051
    Process kworker/1:2 (pid: 134, stack limit = 0x(ptrval))
    Stack: (0xee871db8 to 0xee872000)
    1da0:                                                       ee871e14 103ace2d
    1dc0: 00000000 ffffffff 00000000 ee871e14 00000005 00000000 c08524a0 00000000
    1de0: ffffe000 c014bdfc c0f04c48 ee871e98 c0f04c48 ee9e5000 c0851120 c014bef0
    1e00: 00000000 b643aea2 ee9b4068 c08509a8 ee2bf940 ee89f710 ee871ecb 00000000
    1e20: 00000008 103ace2d 00000000 c087e248 ee29c868 103ace2d 00000001 ffffffff
    1e40: 00000000 ee871e98 00000006 00000000 c0fb2a50 c087e2d0 ffffffff c08523c4
    1e60: ffffffff c014bdfc 00000006 c0fad2d0 ee871e98 ee89f710 00000000 c014c500
    1e80: 00000000 ee89f3c0 c0f04c48 00000000 ee9e5000 c087dfb4 ee9e5000 00000000
    1ea0: ee89f710 ee871ecb 00000001 103ace2d 00000000 c0f04c48 00000000 c087e0a8
    1ec0: 00000000 efd9a3e0 0089f3c0 103ace2d ee89f700 ee89f710 ee9e5000 00000122
    1ee0: 00000100 c087e130 ee89f700 c0fad2c8 c1003ef0 c087de4c 2e928000 c0fad2ec
    1f00: c0fad2ec ee839580 ef7a62c0 ef7a9400 00000000 c087def8 c0fad2ec c01447dc
    1f20: ef315640 ef7a62c0 00000008 ee839580 ee839594 ef7a62c0 00000008 c0f03d00
    1f40: ef7a62d8 ef7a62c0 ffffe000 c0145b84 ffffe000 c0fb2420 c0bfaa8c 00000000
    1f60: ffffe000 ee84b600 ee84b5c0 00000000 ee870000 ee839580 c0145b40 ef0e5ea4
    1f80: ee84b61c c014a6f8 00000001 ee84b5c0 c014a5b0 00000000 00000000 00000000
    1fa0: 00000000 00000000 00000000 c01010e8 00000000 00000000 00000000 00000000
    1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
    1fe0: 00000000 00000000 00000000 00000000 00000013 00000000 00000000 00000000
    [<c08533ec>] (dsa_switch_event) from [<c014bdfc>] (notifier_call_chain+0x48/0x84)
    [<c014bdfc>] (notifier_call_chain) from [<c014bef0>] (raw_notifier_call_chain+0x18/0x20)
    [<c014bef0>] (raw_notifier_call_chain) from [<c08509a8>] (dsa_port_mdb_add+0x48/0x74)
    [<c08509a8>] (dsa_port_mdb_add) from [<c087e248>] (__switchdev_handle_port_obj_add+0x54/0xd4)
    [<c087e248>] (__switchdev_handle_port_obj_add) from [<c087e2d0>] (switchdev_handle_port_obj_add+0x8/0x14)
    [<c087e2d0>] (switchdev_handle_port_obj_add) from [<c08523c4>] (dsa_slave_switchdev_blocking_event+0x94/0xa4)
    [<c08523c4>] (dsa_slave_switchdev_blocking_event) from [<c014bdfc>] (notifier_call_chain+0x48/0x84)
    [<c014bdfc>] (notifier_call_chain) from [<c014c500>] (blocking_notifier_call_chain+0x50/0x68)
    [<c014c500>] (blocking_notifier_call_chain) from [<c087dfb4>] (switchdev_port_obj_notify+0x44/0xa8)
    [<c087dfb4>] (switchdev_port_obj_notify) from [<c087e0a8>] (switchdev_port_obj_add_now+0x90/0x104)
    [<c087e0a8>] (switchdev_port_obj_add_now) from [<c087e130>] (switchdev_port_obj_add_deferred+0x14/0x5c)
    [<c087e130>] (switchdev_port_obj_add_deferred) from [<c087de4c>] (switchdev_deferred_process+0x64/0x104)
    [<c087de4c>] (switchdev_deferred_process) from [<c087def8>] (switchdev_deferred_process_work+0xc/0x14)
    [<c087def8>] (switchdev_deferred_process_work) from [<c01447dc>] (process_one_work+0x218/0x50c)
    [<c01447dc>] (process_one_work) from [<c0145b84>] (worker_thread+0x44/0x5bc)
    [<c0145b84>] (worker_thread) from [<c014a6f8>] (kthread+0x148/0x150)
    [<c014a6f8>] (kthread) from [<c01010e8>] (ret_from_fork+0x14/0x2c)
    Exception stack(0xee871fb0 to 0xee871ff8)
    1fa0:                                     00000000 00000000 00000000 00000000
    1fc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
    1fe0: 00000000 00000000 00000000 00000000 00000013 00000000
    Code: bad PC value
    ---[ end trace 1292c61abd17b130 ]---

    [<c08533ec>] (dsa_switch_event) from [<c014bdfc>] (notifier_call_chain+0x48/0x84)
    corresponds to

	$ arm-linux-gnueabihf-addr2line -C -i -e vmlinux c08533ec

	linux/net/dsa/switch.c:156
	linux/net/dsa/switch.c:178
	linux/net/dsa/switch.c:328

Fixes: e6db98db8a95 ("net: dsa: add switch mdb bitmap functions")
Signed-off-by: Chen-Yu Tsai <wens@csie.org>
---
Changes since v2:

  - Moved the check back to dsa_switch_mdb_add_bitmap, but don't change
    the function return type this time
  - Rewrote the commit log

Changes since v1:

  - Moved the check to the beginning of dsa_switch_mdb_add()

---
 net/dsa/switch.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index 4ec5b7f85d51..09d9286b27cc 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -153,6 +153,9 @@ static void dsa_switch_mdb_add_bitmap(struct dsa_switch *ds,
 {
 	int port;
 
+	if (!ds->ops->port_mdb_add)
+		return;
+
 	for_each_set_bit(port, bitmap, ds->num_ports)
 		ds->ops->port_mdb_add(ds, port, mdb);
 }
-- 
2.20.1


^ permalink raw reply related

* Re: [BUG] fec mdio times out under system stress
From: Vladimir Oltean @ 2019-08-11 14:54 UTC (permalink / raw)
  To: Russell King - ARM Linux admin
  Cc: linux-arm-kernel, Fabio Estevam, netdev, Andrew Lunn,
	Florian Fainelli, Heiner Kallweit, Hubert Feurstein
In-Reply-To: <20190811133707.GC13294@shell.armlinux.org.uk>

Hi Russell, Fabio,

On Sun, 11 Aug 2019 at 16:42, Russell King - ARM Linux admin
<linux@armlinux.org.uk> wrote:
>
> Hi Fabio,
>
> When I woke up this morning, I found that one of the Hummingboards
> had gone offline (as in, lost network link) during the night.
> Investigating, I find that the system had gone into OOM, and at
> that time, triggered an unrelated:
>
> [4111697.698776] fec 2188000.ethernet eth0: MDIO read timeout
> [4111697.712996] MII_DATA: 0x6006796d
> [4111697.729415] MII_SPEED: 0x0000001a
> [4111697.745232] IEVENT: 0x00000000
> [4111697.745242] IMASK: 0x0a8000aa
> [4111698.002233] Atheros 8035 ethernet 2188000.ethernet-1:00: PHY state change RUNNING -> HALTED
> [4111698.009882] fec 2188000.ethernet eth0: Link is Down
>
> This is on a dual-core iMX6.
>
> It looks like the read actually completed (since MII_DATA contains
> the register data) but we somehow lost the interrupt (or maybe
> received the interrupt after wait_for_completion_timeout() timed
> out.)
>
> From what I can see, the OOM events happened on CPU1, CPU1 was
> allocated the FEC interrupt, and the PHY polling that suffered the
> MDIO timeout was on CPU0.
>
> Given that IEVENT is zero, it seems that CPU1 had read serviced the
> interrupt, but it is not clear how far through processing that it
> was - it may be that fec_enet_interrupt() had been delayed by the
> OOM condition.
>
> This seems rather fragile - as the system slowing down due to OOM
> triggers the network to completely collapse by phylib taking the
> PHY offline, making the system inaccessible except through the
> console.
>
> In my case, even serial console wasn't operational (except for
> magic sysrq).  Not sure what agetty was playing at... so the only
> way I could recover any information from the system was to connect
> the HDMI and plug in a USB keyboard.
>
> Any thoughts on how FEC MDIO accesses could be made more robust?
>

I think a better question is why is the FEC MDIO controller configured
to emit interrupts anyway (especially since the API built on top does
not benefit in any way from this)? Hubert (copied) sent an interesting
email very recently where he pointed out that this is one of the main
sources of jitter when reading the PTP clock on a Marvell switch
attached over MDIO.

> Maybe phylib should retry a number of times - but with read-sensitive
> registers, if the read has already completed successfully, and its
> just a problem with the FEC MDIO hardware, that could cause issues.
>
> Thanks.
>
> --
> RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
> FTTC broadband for 0.8mile line in suburbia: sync at 12.1Mbps down 622kbps up
> According to speedtest.net: 11.9Mbps down 500kbps up

Regards,
-Vladimir

^ permalink raw reply

* [PATCH net-next 1/2] net: dsa: mv88e6xxx: fix RGMII-ID port setup
From: Marek Behún @ 2019-08-11 15:08 UTC (permalink / raw)
  To: netdev
  Cc: Marek Behún, Heiner Kallweit, Sebastian Reichel,
	Vivien Didelot, Andrew Lunn, Florian Fainelli, David S . Miller

The mv88e6xxx_port_setup_mac looks if one of the {link, speed, duplex}
parameters is being changed from the current setting, and if not, does
not do anything. This test is wrong in some situations: this method also
has the mode argument, which can also be changed.

For example on Turris Omnia, the mode is PHY_INTERFACE_MODE_RGMII_ID,
which has to be set byt the ->port_set_rgmii_delay method. The test does
not look if mode is being changed (in fact there is currently no method
to determine port mode as phy_interface_t type).

The simplest solution seems to be to drop this test altogether and
simply do the setup when requested.

Signed-off-by: Marek Behún <marek.behun@nic.cz>
Cc: Heiner Kallweit <hkallweit1@gmail.com>
Cc: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Cc: Vivien Didelot <vivien.didelot@gmail.com>
Cc: Andrew Lunn <andrew@lunn.ch>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 2e8b1ab2c6f7..aae63f6515b3 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -420,15 +420,6 @@ int mv88e6xxx_port_setup_mac(struct mv88e6xxx_chip *chip, int port, int link,
 	if (err)
 		return err;
 
-	/* Has anything actually changed? We don't expect the
-	 * interface mode to change without one of the other
-	 * parameters also changing
-	 */
-	if (state.link == link &&
-	    state.speed == speed &&
-	    state.duplex == duplex)
-		return 0;
-
 	/* Port's MAC control must not be changed unless the link is down */
 	err = chip->info->ops->port_set_link(chip, port, 0);
 	if (err)
-- 
2.21.0


^ permalink raw reply related

* [PATCH net-next 2/2] net: fixed_phy: set is_gigabit_capable member when needed
From: Marek Behún @ 2019-08-11 15:08 UTC (permalink / raw)
  To: netdev
  Cc: Marek Behún, Heiner Kallweit, Sebastian Reichel,
	Vivien Didelot, Andrew Lunn, Florian Fainelli, David S . Miller
In-Reply-To: <20190811150812.6780-1-marek.behun@nic.cz>

The fixed_phy driver does not set the phydev->is_gigabit_capable member
when the fixed_phy is gigabit capable. This in turn causes
phy_device.c:genphy_read_status function to return unknown phy
parameters (SPEED_UNKNOWN, DUPLEX_UNKNOWN, ...), if the fixed_phy is
created with SPEED_1000.

Signed-off-by: Marek Behún <marek.behun@nic.cz>
Cc: Heiner Kallweit <hkallweit1@gmail.com>
Cc: Sebastian Reichel <sebastian.reichel@collabora.co.uk>
Cc: Vivien Didelot <vivien.didelot@gmail.com>
Cc: Andrew Lunn <andrew@lunn.ch>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/fixed_phy.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c
index 3ffe46df249e..424b02ad7b7b 100644
--- a/drivers/net/phy/fixed_phy.c
+++ b/drivers/net/phy/fixed_phy.c
@@ -286,6 +286,7 @@ static struct phy_device *__fixed_phy_register(unsigned int irq,
 				 phy->supported);
 		linkmode_set_bit(ETHTOOL_LINK_MODE_1000baseT_Full_BIT,
 				 phy->supported);
+		phy->is_gigabit_capable = 1;
 		/* fall through */
 	case SPEED_100:
 		linkmode_set_bit(ETHTOOL_LINK_MODE_100baseT_Half_BIT,
-- 
2.21.0


^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox