Netdev List
 help / color / mirror / Atom feed
* [PATCH/RFC flow-net-next 07/10] net: flow: Add importance to flows
From: Simon Horman @ 2014-12-29  2:15 UTC (permalink / raw)
  To: John Fastabend, netdev; +Cc: Simon Horman
In-Reply-To: <1419819340-19000-1-git-send-email-simon.horman@netronome.com>

This is in preparation for adding support for eviction of flows
from tables when resource contention occurs. The importance of
a flow may be used to influence the eviction algorithm.

Inspired by the eviction feature of OpenFlow.

Signed-off-by: Simon Horman <simon.horman@netronome.com>

---

Compile tested only
---
 include/uapi/linux/if_flow.h | 14 ++++++++++++++
 net/core/flow_table.c        | 12 ++++++++++++
 2 files changed, 26 insertions(+)

diff --git a/include/uapi/linux/if_flow.h b/include/uapi/linux/if_flow.h
index d1643f3..91fcfb4 100644
--- a/include/uapi/linux/if_flow.h
+++ b/include/uapi/linux/if_flow.h
@@ -126,6 +126,7 @@
  *           [NET_FLOW_ATTR_TABLE]
  *	     [NET_FLOW_ATTR_UID]
  *	     [NET_FLOW_ATTR_PRIORITY]
+ *	     [NET_FLOW_ATTR_IMPORTANCE]
  *	     [NET_FLOW_ATTR_IDLE_TIMEOUT]
  *	     [NET_FLOW_ATTR_HARD_TIMEOUT]
  *	     [NET_FLOW_ATTR_BYTE_COUNT]
@@ -155,6 +156,7 @@
  *     [NET_FLOW_ATTR_TABLE]
  *     [NET_FLOW_ATTR_UID]
  *     [NET_FLOW_ATTR_PRIORITY]
+ *     [NET_FLOW_ATTR_IMPORTANCE]
  *     [NET_FLOW_ATTR_IDLE_TIMEOUT]
  *     [NET_FLOW_ATTR_HARD_TIMEOUT]
  *     [NET_FLOW_ATTR_BYTE_COUNT]
@@ -425,6 +427,11 @@ enum {
  *
  * @uid unique identifier for flow
  * @priority priority to execute flow match/action in table
+ * @importance importance of flow used to influence flow eviction algorithm
+ *             If eviction is enabled and uses importance then
+ *             flows with lower importance values must be evicted
+ *             before those with higher importance values.
+ *             The values 0xffff ff00 - 0xffff ffff are reserved for future use.
  * @match null terminated set of match uids match criteria
  * @action null terminated set of action uids to apply to match
  * @idle_timeout idle timeout of flow in seconds. Zero for no timeout.
@@ -442,6 +449,7 @@ struct net_flow_flow {
 	int table_id;
 	int uid;
 	int priority;
+	__u32 importance;
 	__u32 idle_timeout;
 	__u32 hard_timeout;
 	__u32 flow_rem;
@@ -484,6 +492,7 @@ enum {
 	NET_FLOW_ATTR_TABLE,
 	NET_FLOW_ATTR_UID,
 	NET_FLOW_ATTR_PRIORITY,
+	NET_FLOW_ATTR_IMPORTANCE,
 	NET_FLOW_ATTR_MATCHES,
 	NET_FLOW_ATTR_ACTIONS,
 	NET_FLOW_ATTR_IDLE_TIMEOUT,
@@ -496,6 +505,8 @@ enum {
 };
 #define NET_FLOW_ATTR_MAX (__NET_FLOW_ATTR_MAX - 1)
 
+#define NET_FLOW_ATTR_IMPORTANCE_MAX (0xfffffff00 - 1)
+
 /**
  * @struct net_flow_table
  * @brief define flow table with supported match/actions
@@ -557,6 +568,9 @@ enum {
 
 	/* Table supports last used counter for flows */
 	NET_FLOW_TABLE_F_PACKET_LAST_USED	= (1 << 4),
+
+	/* Table supports importance of flows */
+	NET_FLOW_TABLE_F_IMPORTANCE		= (1 << 5),
 };
 
 #if 0
diff --git a/net/core/flow_table.c b/net/core/flow_table.c
index 10b113f..0bf399c 100644
--- a/net/core/flow_table.c
+++ b/net/core/flow_table.c
@@ -53,6 +53,7 @@ struct nla_policy net_flow_flow_policy[NET_FLOW_ATTR_MAX + 1] = {
 	[NET_FLOW_ATTR_TABLE]	= { .type = NLA_U32 },
 	[NET_FLOW_ATTR_UID]		= { .type = NLA_U32 },
 	[NET_FLOW_ATTR_PRIORITY]	= { .type = NLA_U32 },
+	[NET_FLOW_ATTR_IMPORTANCE]	= { .type = NLA_U32 },
 	[NET_FLOW_ATTR_IDLE_TIMEOUT]	= { .type = NLA_U32 },
 	[NET_FLOW_ATTR_HARD_TIMEOUT]	= { .type = NLA_U32 },
 	[NET_FLOW_ATTR_BYTE_COUNT]	= { .type = NLA_U64 },
@@ -206,6 +207,11 @@ int net_flow_put_flow(struct sk_buff *skb, struct net_flow_flow *flow)
 	    nla_put_u32(skb, NET_FLOW_ATTR_PRIORITY, flow->priority))
 		goto flows_put_failure;
 
+	if (flow->importance &&
+	    (nla_put_u32(skb, NET_FLOW_ATTR_IMPORTANCE, flow->importance) ||
+	     flow->importance > NET_FLOW_ATTR_IMPORTANCE_MAX))
+		goto flows_put_failure;
+
 	if (flow->idle_timeout &&
 	    nla_put_u32(skb, NET_FLOW_ATTR_IDLE_TIMEOUT, flow->idle_timeout))
 		goto flows_put_failure;
@@ -556,6 +562,9 @@ static int net_flow_get_flow(struct net_flow_flow *flow, struct nlattr *attr)
 	flow->uid = nla_get_u32(f[NET_FLOW_ATTR_UID]);
 	flow->priority = nla_get_u32(f[NET_FLOW_ATTR_PRIORITY]);
 
+	if (f[NET_FLOW_ATTR_IMPORTANCE])
+		flow->importance = nla_get_u32(f[NET_FLOW_ATTR_IMPORTANCE]);
+
 	if (f[NET_FLOW_ATTR_IDLE_TIMEOUT])
 		flow->idle_timeout = nla_get_u32(f[NET_FLOW_ATTR_IDLE_TIMEOUT]);
 	if (f[NET_FLOW_ATTR_HARD_TIMEOUT])
@@ -1423,6 +1432,9 @@ static int net_flow_table_cmd_flows(struct sk_buff *recv_skb,
 		if (err)
 			goto out;
 
+		if (this.importance)
+			used_features |= NET_FLOW_TABLE_F_IMPORTANCE;
+
 		if (this.idle_timeout)
 			used_features |= NET_FLOW_TABLE_F_IDLE_TIMEOUT;
 		if (this.hard_timeout)
-- 
2.1.3

^ permalink raw reply related

* [PATCH/RFC flow-net-next 06/10] net: flow: Add flow removed notification
From: Simon Horman @ 2014-12-29  2:15 UTC (permalink / raw)
  To: John Fastabend, netdev; +Cc: Simon Horman
In-Reply-To: <1419819340-19000-1-git-send-email-simon.horman@netronome.com>

The purpose of this change is to provide an optional notification mechanism
for flow removal. It is intended to be used in conjunction with proposals
to optionally allow expiry of flows due to timeouts or resource contention.

The flow removed message is designed to have two forms:

1. A summary form where NET_FLOW_REMOVED_FLOW_COUNT is present,
   indicating the number of flows that were removed, and
   NET_FLOW_REMOVED_FLOWS_FLOWS is absent. In this form no details are
   provided about the removed flows beyond which table they were removed
   from and the reason for removal.

   The intention is to provide a lightweight mechanism that may be
   useful at times of resource contention.

2. A full form where NET_FLOW_REMOVED_FLOWS_FLOWS is present, including
   the flows that were removed, and NET_FLOW_REMOVED_FLOW_COUNT is absent.
   This form provides full details of the flows removed.

Inspired by flow removed notifications in OpenFlow.

Signed-off-by: Simon Horman <simon.horman@netronome.com>

---

Compile tested only
---
 include/linux/if_flow.h      |  3 ++
 include/uapi/linux/if_flow.h | 74 ++++++++++++++++++++++++++++++++++-
 net/core/flow_table.c        | 92 ++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 164 insertions(+), 5 deletions(-)

diff --git a/include/linux/if_flow.h b/include/linux/if_flow.h
index 4af9794..351eb30 100644
--- a/include/linux/if_flow.h
+++ b/include/linux/if_flow.h
@@ -4,4 +4,7 @@
 #include <uapi/linux/if_flow.h>
 
 int net_flow_put_flow(struct sk_buff *skb, struct net_flow_flow *flow);
+int net_flow_put_rem_flow_summary_msg(struct net_device *dev,
+				      struct net *net, u32 portid, int table,
+				      u32 reason, int n_flows, gfp_t flags);
 #endif
diff --git a/include/uapi/linux/if_flow.h b/include/uapi/linux/if_flow.h
index 96905fa..d1643f3 100644
--- a/include/uapi/linux/if_flow.h
+++ b/include/uapi/linux/if_flow.h
@@ -36,6 +36,7 @@
  *       [NET_FLOW_TABLE_ATTR_SOURCE]
  *       [NET_FLOW_TABLE_ATTR_SIZE]
  *       [NET_FLOW_TABLE_ATTR_FEATURES]
+ *       [NET_FLOW_TABLE_ATTR_FLOW_REM]
  *	 [NET_FLOW_TABLE_ATTR_MATCHES]
  *	   [NET_FLOW_FIELD_REF]
  *	   [NET_FLOW_FIELD_REF]
@@ -159,6 +160,7 @@
  *     [NET_FLOW_ATTR_BYTE_COUNT]
  *     [NET_FLOW_ATTR_PACKET_COUNT]
  *     [NET_FLOW_ATTR_LAST_USED]
+ *     [NET_FLOW_ATTR_FLOW_REM]
  *     [NET_FLOW_MATCHES]
  *       [NET_FLOW_FIELD_REF]
  *       [NET_FLOW_FIELD_REF]
@@ -196,6 +198,39 @@
  * [NET_FLOW_NOTIFICATION]
  *   [NET_FLOW_NOTIFICATION_ATTR_TYPE]
  *   [NET_FLOW_NOTIFICATION_ATTR_PIDS]
+ *
+ * Flow Removed Notification <Kernel-to-user notification>
+ *
+ * [NET_FLOW_REM_FLOW]
+ *   [NET_FLOW_REM_FLOW_TABLE]
+ *   [NET_FLOW_REM_FLOW_REASON]
+ *   [NET_FLOW_REM_FLOW_COUNT]
+ *   [NET_FLOW_REM_FLOWS_FLOWS]
+ *      [NET_FLOW_FLOW]
+ *           [NET_FLOW_ATTR_TABLE]
+ *	     [NET_FLOW_ATTR_UID]
+ *	     [NET_FLOW_ATTR_PRIORITY]
+ *	     [NET_FLOW_ATTR_IDLE_TIMEOUT]
+ *	     [NET_FLOW_ATTR_HARD_TIMEOUT]
+ *	     [NET_FLOW_ATTR_BYTE_COUNT]
+ *	     [NET_FLOW_ATTR_PACKET_COUNT]
+ *	     [NET_FLOW_ATTR_USED]
+ *	     [NET_FLOW_ATTR_FLOW_REM]
+ *	     [NET_FLOW_ATTR_MATCHES]
+ *	        [NET_FLOW_FIELD_REF]
+ *	        [NET_FLOW_FIELD_REF]
+ *	        [...]
+ *	     [NET_FLOW_ATTR_ACTIONS]
+ *	        [NET_FLOW_ACTION]
+ *	          [NET_FLOW_ACTION_ATTR_UID]
+ *	          [NET_FLOW_ACTION_ATTR_SIGNATURE]
+ *		     [NET_FLOW_ACTION_ARG]
+ *	             [...]
+ *	        [NET_FLOW_ACTION]
+ *	          [..]
+ *	        [...]
+ *    [NET_FLOW_FLOW]
+ *	    [...]
  */
 
 #ifndef _UAPI_LINUX_IF_FLOW
@@ -397,7 +432,9 @@ enum {
  *               Zero for no timeout.
  * @byte_count bytes recieved
  * @byte_count packets recieved
- * @last_used time of most recent use (msec since system initialisation)
+ * @used time of most recent use (msec)
+ * @flow_rem flow notifications to send.
+ *           Bitmap of NET_FLOW_REM_F_*
  *
  * Flows must match all entries in match set.
  */
@@ -407,6 +444,7 @@ struct net_flow_flow {
 	int priority;
 	__u32 idle_timeout;
 	__u32 hard_timeout;
+	__u32 flow_rem;
 	__u64 byte_count;
 	__u64 packet_count;
 	__u64 last_used;
@@ -453,6 +491,7 @@ enum {
 	NET_FLOW_ATTR_BYTE_COUNT,
 	NET_FLOW_ATTR_PACKET_COUNT,
 	NET_FLOW_ATTR_LAST_USED,
+	NET_FLOW_ATTR_FLOW_REM,
 	__NET_FLOW_ATTR_MAX,
 };
 #define NET_FLOW_ATTR_MAX (__NET_FLOW_ATTR_MAX - 1)
@@ -464,6 +503,9 @@ enum {
  * @uid unique identifier for table
  * @source uid of parent table
  * @size max number of entries for table or -1 for unbounded
+ * @features Features supported by table. Bitmap of NET_FLOW_TABLE_F_*
+ * @flow_rem Flow removal notifications supported by table.
+             Bitmap of NET_FLOW_REM_F_*
  * @matches null terminated set of supported match types given by match uid
  * @actions null terminated set of supported action types given by action uid
  * @flows set of flows
@@ -474,6 +516,7 @@ struct net_flow_table {
 	int source;
 	int size;
 	__u32 features;
+	__u32 flow_rem;
 	struct net_flow_field_ref *matches;
 	net_flow_action_ref *actions;
 };
@@ -494,6 +537,7 @@ enum {
 	NET_FLOW_TABLE_ATTR_MATCHES,
 	NET_FLOW_TABLE_ATTR_ACTIONS,
 	NET_FLOW_TABLE_ATTR_FEATURES,
+	NET_FLOW_TABLE_ATTR_FLOW_REM,
 	__NET_FLOW_TABLE_ATTR_MAX,
 };
 #define NET_FLOW_TABLE_ATTR_MAX (__NET_FLOW_TABLE_ATTR_MAX - 1)
@@ -669,6 +713,29 @@ enum {
 #define NET_FLOW_NOTIFICATION_ATTR_MAX (__NET_FLOW_NOTIFICATION_ATTR_MAX - 1)
 
 enum {
+	NET_FLOW_REM_FLOW_UNSPEC,
+	NET_FLOW_REM_FLOW_TABLE,
+	NET_FLOW_REM_FLOW_REASON,
+	NET_FLOW_REM_FLOW_COUNT,
+	NET_FLOW_REM_FLOW_FLOWS,
+
+	__NET_FLOW_REM_FLOW_MAX,
+	NET_FLOW_REM_FLOW_MAX = (__NET_FLOW_REM_FLOW_MAX - 1),
+};
+
+enum net_flow_rem_reason {
+	NET_FLOW_REM_FLOW_REASON_IDLE_TIMEOUT,	/* Idle timeout */
+	NET_FLOW_REM_FLOW_REASON_HARD_TIMEOUT,	/* Hard timeout */
+	NET_FLOW_REM_FLOW_REASON_DELETE,	/* Deleted (by NET_FLOW_TABLE_CMD_DEL_FLOWS) */
+};
+
+enum {
+	NET_FLOW_REM_F_IDLE_TIMEOUT	= (1 << NET_FLOW_REM_FLOW_REASON_IDLE_TIMEOUT),
+	NET_FLOW_REM_F_HARD_TIMEOUT	= (1 << NET_FLOW_REM_FLOW_REASON_HARD_TIMEOUT),
+	NET_FLOW_REM_F_DELETE		= (1 << NET_FLOW_REM_FLOW_REASON_DELETE),
+};
+
+enum {
 	NET_FLOW_UNSPEC,
 	NET_FLOW_IDENTIFIER_TYPE,
 	NET_FLOW_IDENTIFIER,
@@ -681,12 +748,14 @@ enum {
 	NET_FLOW_FLOWS,
 	NET_FLOW_FLOWS_ERROR,
 	NET_FLOW_NOTIFICATION,
+	NET_FLOW_REM_FLOW,
 
 	__NET_FLOW_MAX,
 	NET_FLOW_MAX = (__NET_FLOW_MAX - 1),
 };
 
 enum {
+	/* Userspace commands. */
 	NET_FLOW_TABLE_CMD_GET_TABLES,
 	NET_FLOW_TABLE_CMD_GET_HEADERS,
 	NET_FLOW_TABLE_CMD_GET_ACTIONS,
@@ -704,6 +773,9 @@ enum {
 	NET_FLOW_TABLE_CMD_SET_NOTIFICATION,
 	NET_FLOW_TABLE_CMD_GET_NOTIFICATION,
 
+	/* Kernel-to-user notifications. */
+	NET_FLOW_TABLE_CMD_REM_FLOW,
+
 	__NET_FLOW_CMD_MAX,
 	NET_FLOW_CMD_MAX = (__NET_FLOW_CMD_MAX - 1),
 };
diff --git a/net/core/flow_table.c b/net/core/flow_table.c
index e8047eb..10b113f 100644
--- a/net/core/flow_table.c
+++ b/net/core/flow_table.c
@@ -58,6 +58,7 @@ struct nla_policy net_flow_flow_policy[NET_FLOW_ATTR_MAX + 1] = {
 	[NET_FLOW_ATTR_BYTE_COUNT]	= { .type = NLA_U64 },
 	[NET_FLOW_ATTR_PACKET_COUNT]	= { .type = NLA_U64 },
 	[NET_FLOW_ATTR_LAST_USED]	= { .type = NLA_U64 },
+	[NET_FLOW_ATTR_FLOW_REM]	= { .type = NLA_U32 },
 	[NET_FLOW_ATTR_MATCHES]	= { .type = NLA_NESTED },
 	[NET_FLOW_ATTR_ACTIONS]	= { .type = NLA_NESTED },
 };
@@ -70,6 +71,7 @@ struct nla_policy net_flow_table_policy[NET_FLOW_TABLE_ATTR_MAX + 1] = {
 	[NET_FLOW_TABLE_ATTR_SOURCE]	= { .type = NLA_U32 },
 	[NET_FLOW_TABLE_ATTR_SIZE]	= { .type = NLA_U32 },
 	[NET_FLOW_TABLE_ATTR_FEATURES]	= { .type = NLA_U32 },
+	[NET_FLOW_TABLE_ATTR_FLOW_REM]	= { .type = NLA_U32 },
 	[NET_FLOW_TABLE_ATTR_MATCHES]	= { .type = NLA_NESTED },
 	[NET_FLOW_TABLE_ATTR_ACTIONS]	= { .type = NLA_NESTED },
 };
@@ -190,7 +192,8 @@ int net_flow_put_flow_action(struct sk_buff *skb, struct net_flow_action *a)
 
 int net_flow_put_flow(struct sk_buff *skb, struct net_flow_flow *flow)
 {
-	struct nlattr *flows, *matches;
+	struct nlattr *flows;
+	struct nlattr *matches = NULL; /* must be null to unwind */
 	struct nlattr *actions = NULL; /* must be null to unwind */
 	int err, j, i = 0;
 
@@ -220,6 +223,10 @@ int net_flow_put_flow(struct sk_buff *skb, struct net_flow_flow *flow)
 	    nla_put_u32(skb, NET_FLOW_ATTR_LAST_USED, flow->last_used))
 		goto flows_put_failure;
 
+	if (flow->flow_rem &&
+	    nla_put_u32(skb, NET_FLOW_ATTR_FLOW_REM,
+			flow->flow_rem))
+
 	matches = nla_nest_start(skb, NET_FLOW_ATTR_MATCHES);
 	if (!matches)
 		goto flows_put_failure;
@@ -273,6 +280,10 @@ static int net_flow_put_table(struct net_device *dev,
 	    nla_put_u32(skb, NET_FLOW_TABLE_ATTR_FEATURES, t->features))
 		return -EMSGSIZE;
 
+	if (t->flow_rem &&
+	    nla_put_u32(skb, NET_FLOW_TABLE_ATTR_FLOW_REM, t->flow_rem))
+		return -EMSGSIZE;
+
 	matches = nla_nest_start(skb, NET_FLOW_TABLE_ATTR_MATCHES);
 	if (!matches)
 		return -EMSGSIZE;
@@ -556,6 +567,8 @@ static int net_flow_get_flow(struct net_flow_flow *flow, struct nlattr *attr)
 		flow->packet_count = nla_get_u64(f[NET_FLOW_ATTR_PACKET_COUNT]);
 	if (f[NET_FLOW_ATTR_LAST_USED])
 		flow->last_used = nla_get_u64(f[NET_FLOW_ATTR_LAST_USED]);
+	if (f[NET_FLOW_ATTR_FLOW_REM])
+		flow->flow_rem = nla_get_u32(f[NET_FLOW_ATTR_FLOW_REM]);
 
 	flow->matches = NULL;
 	flow->actions = NULL;
@@ -654,6 +667,9 @@ static int net_flow_get_table(struct net_flow_table *table, struct nlattr *nla)
 	table->features = tbl[NET_FLOW_TABLE_ATTR_FEATURES] ?
 		          nla_get_u32(tbl[NET_FLOW_TABLE_ATTR_FEATURES]) : 0;
 
+	table->flow_rem = tbl[NET_FLOW_TABLE_ATTR_FLOW_REM] ?
+		          nla_get_u32(tbl[NET_FLOW_TABLE_ATTR_FLOW_REM]) : 0;
+
 	if (tbl[NET_FLOW_TABLE_ATTR_MATCHES]) {
 		cnt = 0;
 		nla_for_each_nested(i, tbl[NET_FLOW_TABLE_ATTR_MATCHES], rem)
@@ -796,11 +812,13 @@ static struct net_flow_table *net_flow_table_get_table(struct net_device *dev,
 }
 
 static int net_flow_table_check_features(struct net_device *dev,
-					 int table_uid, u32 used_features)
+					 int table_uid, u32 used_features,
+					 u32 used_flow_rem)
 {
 	struct net_flow_table *table;
 
-	if (!used_features) /* No features: no problems */
+	/* No features, no flags: no problems */
+	if (!used_features && !used_flow_rem)
 		return 0;
 
 	table = net_flow_table_get_table(dev, table_uid);
@@ -810,6 +828,9 @@ static int net_flow_table_check_features(struct net_device *dev,
 	if ((used_features & table->features) != used_features)
 		return -EINVAL;
 
+	if ((used_flow_rem & table->flow_rem) != used_flow_rem)
+		return -EINVAL;
+
 	return 0;
 }
 
@@ -1415,7 +1436,8 @@ static int net_flow_table_cmd_flows(struct sk_buff *recv_skb,
 			used_features |= NET_FLOW_TABLE_F_PACKET_LAST_USED;
 
 		err = net_flow_table_check_features(dev, this.table_id,
-						    used_features);
+						    used_features,
+						    this.flow_rem);
 		if (err)
 			break;
 
@@ -1629,6 +1651,68 @@ err:
 	return err;
 }
 
+/**
+ * net_flow_put_rem_flow_summary_msg
+ * @dev: network device of flow table from which flows that were removed
+ * @net: network namespce to use when sending message
+ * @portid: netlink portid of the destination socket
+ * @table: table that flows were removed from
+ * @reason: reason for removal
+ * @n_flows: number of flows rem
+ * @flags: the type of memory to allocate
+ */
+int net_flow_put_rem_flow_summary_msg(struct net_device *dev,
+				      struct net *net, u32 portid, int table,
+				      u32 reason, int n_flows, gfp_t flags)
+{
+	int err = -ENOBUFS;
+	struct genl_info info = {
+		.dst_sk = net->genl_sock,
+		.snd_portid = portid,
+	};
+	struct genlmsghdr *hdr;
+	struct nlattr *start;
+	struct sk_buff *skb;
+
+	skb = genlmsg_new_unicast(NLMSG_DEFAULT_SIZE, &info, flags);
+	if (!skb)
+		goto err;
+
+	hdr = genlmsg_put(skb, 0, 0, &net_flow_nl_family, 0,
+			  NET_FLOW_TABLE_CMD_REM_FLOW);
+	if (!hdr)
+		goto err;
+
+	if (nla_put_u32(skb, NET_FLOW_IDENTIFIER_TYPE, NET_FLOW_IDENTIFIER_IFINDEX) ||
+	    nla_put_u32(skb, NET_FLOW_IDENTIFIER, dev->ifindex))
+		goto err;
+
+	start = nla_nest_start(skb, NET_FLOW_REM_FLOW);
+	if (!start) {
+		err = -EMSGSIZE;
+		goto err;
+	}
+
+	if (nla_put_u32(skb, NET_FLOW_REM_FLOW_TABLE, table) ||
+	    nla_put_u32(skb, NET_FLOW_REM_FLOW_REASON, reason) ||
+	    nla_put_u32(skb, NET_FLOW_REM_FLOW_COUNT, n_flows))
+		goto err;
+
+	nla_nest_end(skb, start);
+
+	err = genlmsg_end(skb, hdr);
+	if (err < 0)
+		goto err;
+
+	err = genlmsg_unicast(net, skb, portid);
+	skb = NULL;
+
+err:
+	kfree_skb(skb);
+	return err;
+}
+EXPORT_SYMBOL(net_flow_put_rem_flow_summary_msg);
+
 static const struct genl_ops net_flow_table_nl_ops[] = {
 	{
 		.cmd = NET_FLOW_TABLE_CMD_GET_TABLES,
-- 
2.1.3

^ permalink raw reply related

* [PATCH/RFC flow-net-next 05/10] net: flow: Add get, set and del notifier commands
From: Simon Horman @ 2014-12-29  2:15 UTC (permalink / raw)
  To: John Fastabend, netdev; +Cc: Simon Horman
In-Reply-To: <1419819340-19000-1-git-send-email-simon.horman@netronome.com>

The purpose of these new commands is to manage the registration
of listeners for flow deletion notifications which will be
proposed in a subsequent patch.

Signed-off-by: Simon Horman <simon.horman@netronome.com>

---

Compile tested only
---
 include/linux/netdevice.h    |   6 ++
 include/uapi/linux/if_flow.h |  41 +++++++++++
 net/core/flow_table.c        | 161 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 208 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2defaae..1174ab7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1165,6 +1165,12 @@ struct net_device_ops {
 						      struct net_flow_flow *f);
 	int  (*ndo_flow_create_table)(struct net_device *dev,
 				      struct net_flow_table *t);
+	int			(*ndo_flow_set_notification)(struct net_device *dev,
+							     u32 type, const u32 *pids,
+							     size_t n_pids);
+	int			(*ndo_flow_get_notification)(struct net_device *dev,
+							     u32 type, u32 **pids,
+							     size_t *n_pids);
 };
 
 /**
diff --git a/include/uapi/linux/if_flow.h b/include/uapi/linux/if_flow.h
index 18214ea..96905fa 100644
--- a/include/uapi/linux/if_flow.h
+++ b/include/uapi/linux/if_flow.h
@@ -172,6 +172,30 @@
  *	 [NET_FLOW_ACTION]
  *	         [..]
  *	       [...]
+ *
+ * Set Flow Notification <Request>,
+ * Get Flow Notification <Request> and
+ * Get Flow Notification <Reply> description.
+ *
+ * Set Flow Notification registers netlink port ids to receive flow
+ * deletion notifications if the NET_FLOW_NOTIFICATION_PIDS attribute is
+ * present. Otherwise it unregisters port ids if they were previously
+ * registered by a Set Flow Notification with the
+ * NET_FLOW_NOTIFICATION_PIDS attribute present.
+ *
+ * Get Flow Notification reports the port ids if they were previously
+ * registered by a Set Flow Notification with the
+ * NET_FLOW_NOTIFICATION_PIDS.  If no ids are registered then the
+ * NET_FLOW_NOTIFICATION_PIDS attribute of the reply should be omitted.
+ *
+ * The NET_FLOW_NOTIFICATION_ATTR_PIDS attribute is an array of u32 values.
+ * If the attribute is present then it must contain at least one element.
+ * The implementation may choose to ignore some elements.  Currently the
+ * implementation ignores all elements other than the first one.
+ *
+ * [NET_FLOW_NOTIFICATION]
+ *   [NET_FLOW_NOTIFICATION_ATTR_TYPE]
+ *   [NET_FLOW_NOTIFICATION_ATTR_PIDS]
  */
 
 #ifndef _UAPI_LINUX_IF_FLOW
@@ -633,6 +657,18 @@ enum {
 };
 
 enum {
+	NET_FLOW_NOTIFICATION_TYPE_FLOW_REM,
+};
+
+enum {
+	NET_FLOW_NOTIFICATION_ATTR_UNSPEC,
+	NET_FLOW_NOTIFICATION_ATTR_TYPE,
+	NET_FLOW_NOTIFICATION_ATTR_PIDS,
+	__NET_FLOW_NOTIFICATION_ATTR_MAX,
+};
+#define NET_FLOW_NOTIFICATION_ATTR_MAX (__NET_FLOW_NOTIFICATION_ATTR_MAX - 1)
+
+enum {
 	NET_FLOW_UNSPEC,
 	NET_FLOW_IDENTIFIER_TYPE,
 	NET_FLOW_IDENTIFIER,
@@ -644,6 +680,7 @@ enum {
 	NET_FLOW_TABLE_GRAPH,
 	NET_FLOW_FLOWS,
 	NET_FLOW_FLOWS_ERROR,
+	NET_FLOW_NOTIFICATION,
 
 	__NET_FLOW_MAX,
 	NET_FLOW_MAX = (__NET_FLOW_MAX - 1),
@@ -663,6 +700,10 @@ enum {
 
 	NET_FLOW_TABLE_CMD_CREATE_TABLE,
 	NET_FLOW_TABLE_CMD_DESTROY_TABLE,
+
+	NET_FLOW_TABLE_CMD_SET_NOTIFICATION,
+	NET_FLOW_TABLE_CMD_GET_NOTIFICATION,
+
 	__NET_FLOW_CMD_MAX,
 	NET_FLOW_CMD_MAX = (__NET_FLOW_CMD_MAX - 1),
 };
diff --git a/net/core/flow_table.c b/net/core/flow_table.c
index 070e646..e8047eb 100644
--- a/net/core/flow_table.c
+++ b/net/core/flow_table.c
@@ -21,6 +21,7 @@
 #include <uapi/linux/if_flow.h>
 #include <linux/if_bridge.h>
 #include <linux/types.h>
+#include <net/sock.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
 #include <net/rtnetlink.h>
@@ -1478,6 +1479,156 @@ out:
 	return -EINVAL;
 }
 
+static const
+struct nla_policy net_flow_notification_policy[NET_FLOW_NOTIFICATION_ATTR_MAX + 1] = {
+	[NET_FLOW_NOTIFICATION_ATTR_TYPE] = { .type = NLA_U32,},
+	[NET_FLOW_NOTIFICATION_ATTR_PIDS] = { .type = NLA_U32,},
+};
+
+static int net_flow_table_cmd_set_notification(struct sk_buff *skb,
+					       struct genl_info *info)
+{
+	int err;
+	struct net_device *dev;
+	struct nlattr *tb[NET_FLOW_NOTIFICATION_ATTR_MAX+1];
+	u32 type;
+
+	dev = net_flow_table_get_dev(info);
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->netdev_ops->ndo_flow_set_notification) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (!info->attrs[NET_FLOW_NOTIFICATION]) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = nla_parse_nested(tb, NET_FLOW_NOTIFICATION_ATTR_MAX,
+			       info->attrs[NET_FLOW_NOTIFICATION],
+			       net_flow_notification_policy);
+	if (err)
+		goto out;
+
+	if (!tb[NET_FLOW_NOTIFICATION_ATTR_TYPE]) {
+		err = -EINVAL;
+		goto out;
+	}
+	type = nla_get_u32(tb[NET_FLOW_NOTIFICATION_ATTR_TYPE]);
+	if (type != NET_FLOW_NOTIFICATION_TYPE_FLOW_REM) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (tb[NET_FLOW_NOTIFICATION_ATTR_PIDS]) {
+		u32 pid;
+
+		/* Only the first pid is used at this time */
+		pid = nla_get_u32(tb[NET_FLOW_NOTIFICATION_ATTR_PIDS]);
+
+		err = dev->netdev_ops->ndo_flow_set_notification(dev, type,
+								 &pid, 1);
+	} else {
+		err = dev->netdev_ops->ndo_flow_set_notification(dev, type,
+								 NULL, 0);
+	}
+
+out:
+	dev_put(dev);
+	return err;
+}
+
+static int net_flow_table_cmd_get_notification(struct sk_buff *skb,
+					       struct genl_info *info)
+{
+	int err;
+	size_t n_pids;
+	struct genlmsghdr *hdr;
+	struct net_device *dev;
+	struct nlattr *start;
+	struct nlattr *tb[NET_FLOW_NOTIFICATION_ATTR_MAX+1];
+	struct sk_buff *msg = NULL;
+	u32 *pids, type;
+
+	dev = net_flow_table_get_dev(info);
+	if (!dev)
+		return -EINVAL;
+
+	if (!dev->netdev_ops->ndo_flow_get_notification) {
+		err = -EOPNOTSUPP;
+		goto err;
+	}
+
+	if (!info->attrs[NET_FLOW_NOTIFICATION]) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	err = nla_parse_nested(tb, NET_FLOW_NOTIFICATION_ATTR_MAX,
+			       info->attrs[NET_FLOW_NOTIFICATION],
+			       net_flow_notification_policy);
+	if (err)
+		goto err;
+
+	if (!tb[NET_FLOW_NOTIFICATION_ATTR_TYPE]) {
+		err = -EINVAL;
+		goto err;
+	}
+	type = nla_get_u32(tb[NET_FLOW_NOTIFICATION_ATTR_TYPE]);
+
+	err = dev->netdev_ops->ndo_flow_get_notification(dev, type, &pids,
+							 &n_pids);
+	if (err)
+		goto err;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg) {
+		err = -ENOBUFS;
+		goto err;
+	}
+
+	hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+			  &net_flow_nl_family, 0,
+			  NET_FLOW_TABLE_CMD_GET_NOTIFICATION);
+	if (!hdr) {
+		err = -ENOBUFS;
+		goto err;
+	}
+
+	if (nla_put_u32(msg, NET_FLOW_IDENTIFIER_TYPE, NET_FLOW_IDENTIFIER_IFINDEX) ||
+	    nla_put_u32(msg, NET_FLOW_IDENTIFIER, dev->ifindex)) {
+		err = -ENOBUFS;
+		goto err;
+	}
+
+	start = nla_nest_start(msg, NET_FLOW_NOTIFICATION);
+	if (!start)
+		return -EMSGSIZE;
+
+	if (nla_put_u32(msg, NET_FLOW_NOTIFICATION_ATTR_TYPE, type) ||
+	    (n_pids > 0 &&
+	     nla_put_u32(msg, NET_FLOW_NOTIFICATION_ATTR_PIDS, pids[0])))
+		return -ENOBUFS;
+
+	nla_nest_end(msg, start);
+
+	err = genlmsg_end(msg, hdr);
+	if (err < 0)
+		goto err;
+
+	dev_put(dev);
+
+	return genlmsg_reply(msg, info);
+
+err:
+	nlmsg_free(msg);
+	dev_put(dev);
+	return err;
+}
+
 static const struct genl_ops net_flow_table_nl_ops[] = {
 	{
 		.cmd = NET_FLOW_TABLE_CMD_GET_TABLES,
@@ -1541,6 +1692,16 @@ static const struct genl_ops net_flow_table_nl_ops[] = {
 		.doit = net_flow_table_cmd_destroy_tables,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NET_FLOW_TABLE_CMD_SET_NOTIFICATION,
+		.doit = net_flow_table_cmd_set_notification,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = NET_FLOW_TABLE_CMD_GET_NOTIFICATION,
+		.doit = net_flow_table_cmd_get_notification,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 
 static int __init net_flow_nl_module_init(void)
-- 
2.1.3

^ permalink raw reply related

* [PATCH/RFC flow-net-next 04/10] net: flow: Add counters to flows
From: Simon Horman @ 2014-12-29  2:15 UTC (permalink / raw)
  To: John Fastabend, netdev; +Cc: Simon Horman
In-Reply-To: <1419819340-19000-1-git-send-email-simon.horman@netronome.com>

It may be useful for hardware flow table support for counters to be exposed
via the flow API. One possible use case of this is for Open vSwitch to use
the flow API in conjunction with its existing datapath flow management
scheme which in a nutshell treats the datapath as a cache that times out
idle entries.

This patch exposes optionally exposes three counters:
- Number of packets that have matched a flow
- Number of bytes of packets that have matched a flow
- The time in ms when the flow was last hit

Inspired by the flow counters present in Open Flow.

Signed-off-by: Simon Horman <simon.horman@netronome.com>

---

Compile tested only
---
 include/uapi/linux/if_flow.h | 24 ++++++++++++++++++++++++
 net/core/flow_table.c        | 27 +++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)

diff --git a/include/uapi/linux/if_flow.h b/include/uapi/linux/if_flow.h
index 28da45b..18214ea 100644
--- a/include/uapi/linux/if_flow.h
+++ b/include/uapi/linux/if_flow.h
@@ -127,6 +127,9 @@
  *	     [NET_FLOW_ATTR_PRIORITY]
  *	     [NET_FLOW_ATTR_IDLE_TIMEOUT]
  *	     [NET_FLOW_ATTR_HARD_TIMEOUT]
+ *	     [NET_FLOW_ATTR_BYTE_COUNT]
+ *	     [NET_FLOW_ATTR_PACKET_COUNT]
+ *	     [NET_FLOW_ATTR_LAST_USED]
  *	     [NET_FLOW_ATTR_MATCHES]
  *	        [NET_FLOW_FIELD_REF]
  *	        [NET_FLOW_FIELD_REF]
@@ -153,6 +156,9 @@
  *     [NET_FLOW_ATTR_PRIORITY]
  *     [NET_FLOW_ATTR_IDLE_TIMEOUT]
  *     [NET_FLOW_ATTR_HARD_TIMEOUT]
+ *     [NET_FLOW_ATTR_BYTE_COUNT]
+ *     [NET_FLOW_ATTR_PACKET_COUNT]
+ *     [NET_FLOW_ATTR_LAST_USED]
  *     [NET_FLOW_MATCHES]
  *       [NET_FLOW_FIELD_REF]
  *       [NET_FLOW_FIELD_REF]
@@ -365,6 +371,9 @@ enum {
  * @idle_timeout idle timeout of flow in seconds. Zero for no timeout.
  * @hard_timeout timeout of flow regardless of use in seconds.
  *               Zero for no timeout.
+ * @byte_count bytes recieved
+ * @byte_count packets recieved
+ * @last_used time of most recent use (msec since system initialisation)
  *
  * Flows must match all entries in match set.
  */
@@ -374,6 +383,9 @@ struct net_flow_flow {
 	int priority;
 	__u32 idle_timeout;
 	__u32 hard_timeout;
+	__u64 byte_count;
+	__u64 packet_count;
+	__u64 last_used;
 	struct net_flow_field_ref *matches;
 	struct net_flow_action *actions;
 };
@@ -414,6 +426,9 @@ enum {
 	NET_FLOW_ATTR_ACTIONS,
 	NET_FLOW_ATTR_IDLE_TIMEOUT,
 	NET_FLOW_ATTR_HARD_TIMEOUT,
+	NET_FLOW_ATTR_BYTE_COUNT,
+	NET_FLOW_ATTR_PACKET_COUNT,
+	NET_FLOW_ATTR_LAST_USED,
 	__NET_FLOW_ATTR_MAX,
 };
 #define NET_FLOW_ATTR_MAX (__NET_FLOW_ATTR_MAX - 1)
@@ -465,6 +480,15 @@ enum {
 
 	/* Table supports idle timeout of flows */
 	NET_FLOW_TABLE_F_HARD_TIMEOUT		= (1 << 1),
+
+	/* Table supports byte counter for flows */
+	NET_FLOW_TABLE_F_BYTE_COUNT		= (1 << 2),
+
+	/* Table supports packet counter for flows */
+	NET_FLOW_TABLE_F_PACKET_COUNT		= (1 << 3),
+
+	/* Table supports last used counter for flows */
+	NET_FLOW_TABLE_F_PACKET_LAST_USED	= (1 << 4),
 };
 
 #if 0
diff --git a/net/core/flow_table.c b/net/core/flow_table.c
index 89ba9bc..070e646 100644
--- a/net/core/flow_table.c
+++ b/net/core/flow_table.c
@@ -54,6 +54,9 @@ struct nla_policy net_flow_flow_policy[NET_FLOW_ATTR_MAX + 1] = {
 	[NET_FLOW_ATTR_PRIORITY]	= { .type = NLA_U32 },
 	[NET_FLOW_ATTR_IDLE_TIMEOUT]	= { .type = NLA_U32 },
 	[NET_FLOW_ATTR_HARD_TIMEOUT]	= { .type = NLA_U32 },
+	[NET_FLOW_ATTR_BYTE_COUNT]	= { .type = NLA_U64 },
+	[NET_FLOW_ATTR_PACKET_COUNT]	= { .type = NLA_U64 },
+	[NET_FLOW_ATTR_LAST_USED]	= { .type = NLA_U64 },
 	[NET_FLOW_ATTR_MATCHES]	= { .type = NLA_NESTED },
 	[NET_FLOW_ATTR_ACTIONS]	= { .type = NLA_NESTED },
 };
@@ -206,6 +209,16 @@ int net_flow_put_flow(struct sk_buff *skb, struct net_flow_flow *flow)
 	    nla_put_u32(skb, NET_FLOW_ATTR_HARD_TIMEOUT, flow->hard_timeout))
 		goto flows_put_failure;
 
+	if (flow->byte_count &&
+	    nla_put_u32(skb, NET_FLOW_ATTR_BYTE_COUNT, flow->byte_count))
+		goto flows_put_failure;
+	if (flow->packet_count &&
+	    nla_put_u32(skb, NET_FLOW_ATTR_PACKET_COUNT, flow->packet_count))
+		goto flows_put_failure;
+	if (flow->last_used &&
+	    nla_put_u32(skb, NET_FLOW_ATTR_LAST_USED, flow->last_used))
+		goto flows_put_failure;
+
 	matches = nla_nest_start(skb, NET_FLOW_ATTR_MATCHES);
 	if (!matches)
 		goto flows_put_failure;
@@ -536,6 +549,13 @@ static int net_flow_get_flow(struct net_flow_flow *flow, struct nlattr *attr)
 	if (f[NET_FLOW_ATTR_HARD_TIMEOUT])
 		flow->hard_timeout = nla_get_u32(f[NET_FLOW_ATTR_HARD_TIMEOUT]);
 
+	if (f[NET_FLOW_ATTR_BYTE_COUNT])
+		flow->byte_count = nla_get_u64(f[NET_FLOW_ATTR_BYTE_COUNT]);
+	if (f[NET_FLOW_ATTR_PACKET_COUNT])
+		flow->packet_count = nla_get_u64(f[NET_FLOW_ATTR_PACKET_COUNT]);
+	if (f[NET_FLOW_ATTR_LAST_USED])
+		flow->last_used = nla_get_u64(f[NET_FLOW_ATTR_LAST_USED]);
+
 	flow->matches = NULL;
 	flow->actions = NULL;
 
@@ -1386,6 +1406,13 @@ static int net_flow_table_cmd_flows(struct sk_buff *recv_skb,
 		if (this.hard_timeout)
 			used_features |= NET_FLOW_TABLE_F_HARD_TIMEOUT;
 
+		if (this.byte_count)
+			used_features |= NET_FLOW_TABLE_F_BYTE_COUNT;
+		if (this.packet_count)
+			used_features |= NET_FLOW_TABLE_F_PACKET_COUNT;
+		if (this.last_used)
+			used_features |= NET_FLOW_TABLE_F_PACKET_LAST_USED;
+
 		err = net_flow_table_check_features(dev, this.table_id,
 						    used_features);
 		if (err)
-- 
2.1.3

^ permalink raw reply related

* [PATCH/RFC flow-net-next 03/10] net: flow: Add timeouts to flows
From: Simon Horman @ 2014-12-29  2:15 UTC (permalink / raw)
  To: John Fastabend, netdev; +Cc: Simon Horman
In-Reply-To: <1419819340-19000-1-git-send-email-simon.horman@netronome.com>

It may be useful for hardware flow table support for timeouts to be exposed
via the flow API. One possible use case of this is for Open vSwitch to use
the flow API in conjunction with its existing datapath flow management
scheme which in a nutshell treats the datapath as a cache that times out
idle entries.

Inspired by the timeouts present in OpenFlow.

Signed-off-by: Simon Horman <simon.horman@netronome.com>

---

Compile tested only

Note to John Fastabend: This patch adds u32 fields to struct net_flow_flow.
This is in contrast to existing int fields of that structure. It is unclear
to me which is best and in practice (2^31-1)s seems to be more than ample
for a timeout.
---
 include/uapi/linux/if_flow.h | 19 +++++++++++++++++++
 net/core/flow_table.c        | 21 ++++++++++++++++++---
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/if_flow.h b/include/uapi/linux/if_flow.h
index 5720698..28da45b 100644
--- a/include/uapi/linux/if_flow.h
+++ b/include/uapi/linux/if_flow.h
@@ -125,6 +125,8 @@
  *           [NET_FLOW_ATTR_TABLE]
  *	     [NET_FLOW_ATTR_UID]
  *	     [NET_FLOW_ATTR_PRIORITY]
+ *	     [NET_FLOW_ATTR_IDLE_TIMEOUT]
+ *	     [NET_FLOW_ATTR_HARD_TIMEOUT]
  *	     [NET_FLOW_ATTR_MATCHES]
  *	        [NET_FLOW_FIELD_REF]
  *	        [NET_FLOW_FIELD_REF]
@@ -149,6 +151,8 @@
  *     [NET_FLOW_ATTR_TABLE]
  *     [NET_FLOW_ATTR_UID]
  *     [NET_FLOW_ATTR_PRIORITY]
+ *     [NET_FLOW_ATTR_IDLE_TIMEOUT]
+ *     [NET_FLOW_ATTR_HARD_TIMEOUT]
  *     [NET_FLOW_MATCHES]
  *       [NET_FLOW_FIELD_REF]
  *       [NET_FLOW_FIELD_REF]
@@ -358,6 +362,9 @@ enum {
  * @priority priority to execute flow match/action in table
  * @match null terminated set of match uids match criteria
  * @action null terminated set of action uids to apply to match
+ * @idle_timeout idle timeout of flow in seconds. Zero for no timeout.
+ * @hard_timeout timeout of flow regardless of use in seconds.
+ *               Zero for no timeout.
  *
  * Flows must match all entries in match set.
  */
@@ -365,6 +372,8 @@ struct net_flow_flow {
 	int table_id;
 	int uid;
 	int priority;
+	__u32 idle_timeout;
+	__u32 hard_timeout;
 	struct net_flow_field_ref *matches;
 	struct net_flow_action *actions;
 };
@@ -403,6 +412,8 @@ enum {
 	NET_FLOW_ATTR_PRIORITY,
 	NET_FLOW_ATTR_MATCHES,
 	NET_FLOW_ATTR_ACTIONS,
+	NET_FLOW_ATTR_IDLE_TIMEOUT,
+	NET_FLOW_ATTR_HARD_TIMEOUT,
 	__NET_FLOW_ATTR_MAX,
 };
 #define NET_FLOW_ATTR_MAX (__NET_FLOW_ATTR_MAX - 1)
@@ -448,6 +459,14 @@ enum {
 };
 #define NET_FLOW_TABLE_ATTR_MAX (__NET_FLOW_TABLE_ATTR_MAX - 1)
 
+enum {
+	/* Table supports idle timeout of flows */
+	NET_FLOW_TABLE_F_IDLE_TIMEOUT		= (1 << 0),
+
+	/* Table supports idle timeout of flows */
+	NET_FLOW_TABLE_F_HARD_TIMEOUT		= (1 << 1),
+};
+
 #if 0
 struct net_flow_offset {
 	int offset;
diff --git a/net/core/flow_table.c b/net/core/flow_table.c
index 1ea88ed..89ba9bc 100644
--- a/net/core/flow_table.c
+++ b/net/core/flow_table.c
@@ -52,6 +52,8 @@ struct nla_policy net_flow_flow_policy[NET_FLOW_ATTR_MAX + 1] = {
 	[NET_FLOW_ATTR_TABLE]	= { .type = NLA_U32 },
 	[NET_FLOW_ATTR_UID]		= { .type = NLA_U32 },
 	[NET_FLOW_ATTR_PRIORITY]	= { .type = NLA_U32 },
+	[NET_FLOW_ATTR_IDLE_TIMEOUT]	= { .type = NLA_U32 },
+	[NET_FLOW_ATTR_HARD_TIMEOUT]	= { .type = NLA_U32 },
 	[NET_FLOW_ATTR_MATCHES]	= { .type = NLA_NESTED },
 	[NET_FLOW_ATTR_ACTIONS]	= { .type = NLA_NESTED },
 };
@@ -197,6 +199,13 @@ int net_flow_put_flow(struct sk_buff *skb, struct net_flow_flow *flow)
 	    nla_put_u32(skb, NET_FLOW_ATTR_PRIORITY, flow->priority))
 		goto flows_put_failure;
 
+	if (flow->idle_timeout &&
+	    nla_put_u32(skb, NET_FLOW_ATTR_IDLE_TIMEOUT, flow->idle_timeout))
+		goto flows_put_failure;
+	if (flow->hard_timeout &&
+	    nla_put_u32(skb, NET_FLOW_ATTR_HARD_TIMEOUT, flow->hard_timeout))
+		goto flows_put_failure;
+
 	matches = nla_nest_start(skb, NET_FLOW_ATTR_MATCHES);
 	if (!matches)
 		goto flows_put_failure;
@@ -522,6 +531,11 @@ static int net_flow_get_flow(struct net_flow_flow *flow, struct nlattr *attr)
 	flow->uid = nla_get_u32(f[NET_FLOW_ATTR_UID]);
 	flow->priority = nla_get_u32(f[NET_FLOW_ATTR_PRIORITY]);
 
+	if (f[NET_FLOW_ATTR_IDLE_TIMEOUT])
+		flow->idle_timeout = nla_get_u32(f[NET_FLOW_ATTR_IDLE_TIMEOUT]);
+	if (f[NET_FLOW_ATTR_HARD_TIMEOUT])
+		flow->hard_timeout = nla_get_u32(f[NET_FLOW_ATTR_HARD_TIMEOUT]);
+
 	flow->matches = NULL;
 	flow->actions = NULL;
 
@@ -1367,9 +1381,10 @@ static int net_flow_table_cmd_flows(struct sk_buff *recv_skb,
 		if (err)
 			goto out;
 
-		/* Set used_features here for each table feature that is used.
-		 * (Currently no table features are defined)
-		 */
+		if (this.idle_timeout)
+			used_features |= NET_FLOW_TABLE_F_IDLE_TIMEOUT;
+		if (this.hard_timeout)
+			used_features |= NET_FLOW_TABLE_F_HARD_TIMEOUT;
 
 		err = net_flow_table_check_features(dev, this.table_id,
 						    used_features);
-- 
2.1.3

^ permalink raw reply related

* [PATCH/RFC flow-net-next 02/10] net: flow: Add features to tables
From: Simon Horman @ 2014-12-29  2:15 UTC (permalink / raw)
  To: John Fastabend, netdev; +Cc: Simon Horman
In-Reply-To: <1419819340-19000-1-git-send-email-simon.horman@netronome.com>

This is intended to allow flows to advertise optional features.
Its initial intended use case is to advertise support for flow timeouts
which will be proposed by a subsequent patch.

Signed-off-by: Simon Horman <simon.horman@netronome.com>

---

Compile tested only
---
 include/uapi/linux/if_flow.h |  3 ++
 net/core/flow_table.c        | 79 ++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 76 insertions(+), 6 deletions(-)

diff --git a/include/uapi/linux/if_flow.h b/include/uapi/linux/if_flow.h
index 25a6b31..5720698 100644
--- a/include/uapi/linux/if_flow.h
+++ b/include/uapi/linux/if_flow.h
@@ -35,6 +35,7 @@
  *       [NET_FLOW_TABLE_ATTR_UID]
  *       [NET_FLOW_TABLE_ATTR_SOURCE]
  *       [NET_FLOW_TABLE_ATTR_SIZE]
+ *       [NET_FLOW_TABLE_ATTR_FEATURES]
  *	 [NET_FLOW_TABLE_ATTR_MATCHES]
  *	   [NET_FLOW_FIELD_REF]
  *	   [NET_FLOW_FIELD_REF]
@@ -422,6 +423,7 @@ struct net_flow_table {
 	int uid;
 	int source;
 	int size;
+	__u32 features;
 	struct net_flow_field_ref *matches;
 	net_flow_action_ref *actions;
 };
@@ -441,6 +443,7 @@ enum {
 	NET_FLOW_TABLE_ATTR_SIZE,
 	NET_FLOW_TABLE_ATTR_MATCHES,
 	NET_FLOW_TABLE_ATTR_ACTIONS,
+	NET_FLOW_TABLE_ATTR_FEATURES,
 	__NET_FLOW_TABLE_ATTR_MAX,
 };
 #define NET_FLOW_TABLE_ATTR_MAX (__NET_FLOW_TABLE_ATTR_MAX - 1)
diff --git a/net/core/flow_table.c b/net/core/flow_table.c
index 5937fb7..1ea88ed 100644
--- a/net/core/flow_table.c
+++ b/net/core/flow_table.c
@@ -63,6 +63,7 @@ struct nla_policy net_flow_table_policy[NET_FLOW_TABLE_ATTR_MAX + 1] = {
 	[NET_FLOW_TABLE_ATTR_UID]	= { .type = NLA_U32 },
 	[NET_FLOW_TABLE_ATTR_SOURCE]	= { .type = NLA_U32 },
 	[NET_FLOW_TABLE_ATTR_SIZE]	= { .type = NLA_U32 },
+	[NET_FLOW_TABLE_ATTR_FEATURES]	= { .type = NLA_U32 },
 	[NET_FLOW_TABLE_ATTR_MATCHES]	= { .type = NLA_NESTED },
 	[NET_FLOW_TABLE_ATTR_ACTIONS]	= { .type = NLA_NESTED },
 };
@@ -245,6 +246,10 @@ static int net_flow_put_table(struct net_device *dev,
 	    nla_put_u32(skb, NET_FLOW_TABLE_ATTR_SIZE, t->size))
 		return -EMSGSIZE;
 
+	if (t->features &&
+	    nla_put_u32(skb, NET_FLOW_TABLE_ATTR_FEATURES, t->features))
+		return -EMSGSIZE;
+
 	matches = nla_nest_start(skb, NET_FLOW_TABLE_ATTR_MATCHES);
 	if (!matches)
 		return -EMSGSIZE;
@@ -611,6 +616,9 @@ static int net_flow_get_table(struct net_flow_table *table, struct nlattr *nla)
 	table->size = tbl[NET_FLOW_TABLE_ATTR_SIZE] ?
 		      nla_get_u32(tbl[NET_FLOW_TABLE_ATTR_SIZE]) : 0;
 
+	table->features = tbl[NET_FLOW_TABLE_ATTR_FEATURES] ?
+		          nla_get_u32(tbl[NET_FLOW_TABLE_ATTR_FEATURES]) : 0;
+
 	if (tbl[NET_FLOW_TABLE_ATTR_MATCHES]) {
 		cnt = 0;
 		nla_for_each_nested(i, tbl[NET_FLOW_TABLE_ATTR_MATCHES], rem)
@@ -719,6 +727,57 @@ static int net_flow_table_cmd_destroy_tables(struct sk_buff *skb,
 	return -EOPNOTSUPP;
 }
 
+static struct net_flow_table **
+net_flow_get_tables(struct net_device *dev)
+{
+	struct net_flow_table **tables;
+
+	if (!dev->netdev_ops->ndo_flow_get_tables)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	tables = dev->netdev_ops->ndo_flow_get_tables(dev);
+	if (!tables) /* transient failure should always have some table */
+		return ERR_PTR(-EBUSY);
+
+	return tables;
+}
+
+static struct net_flow_table *net_flow_table_get_table(struct net_device *dev,
+						       int table_uid)
+{
+	struct net_flow_table **tables;
+	int i;
+
+	tables = net_flow_get_tables(dev);
+	if (IS_ERR(tables))
+		return ERR_PTR(PTR_ERR(tables));
+
+	for (i = 0; tables[i]->uid; i++) {
+		if (tables[i]->uid == table_uid)
+			return tables[i];
+	}
+
+	return ERR_PTR(-ENOENT);
+}
+
+static int net_flow_table_check_features(struct net_device *dev,
+					 int table_uid, u32 used_features)
+{
+	struct net_flow_table *table;
+
+	if (!used_features) /* No features: no problems */
+		return 0;
+
+	table = net_flow_table_get_table(dev, table_uid);
+	if (IS_ERR(table))
+		return PTR_ERR(table);
+
+	if ((used_features & table->features) != used_features)
+		return -EINVAL;
+
+	return 0;
+}
+
 static int net_flow_table_cmd_get_tables(struct sk_buff *skb,
 					 struct genl_info *info)
 {
@@ -730,15 +789,12 @@ static int net_flow_table_cmd_get_tables(struct sk_buff *skb,
 	if (!dev)
 		return -EINVAL;
 
-	if (!dev->netdev_ops->ndo_flow_get_tables) {
+	tables = net_flow_get_tables(dev);
+	if (IS_ERR(tables)) {
 		dev_put(dev);
-		return -EOPNOTSUPP;
+		return PTR_ERR(tables);
 	}
 
-	tables = dev->netdev_ops->ndo_flow_get_tables(dev);
-	if (!tables) /* transient failure should always have some table */
-		return -EBUSY;
-
 	msg = net_flow_build_tables_msg(tables, dev,
 					info->snd_portid,
 					info->snd_seq,
@@ -1302,6 +1358,8 @@ static int net_flow_table_cmd_flows(struct sk_buff *recv_skb,
 		err_handle = nla_get_u32(info->attrs[NET_FLOW_FLOWS_ERROR]);
 
 	nla_for_each_nested(flow, info->attrs[NET_FLOW_FLOWS], rem) {
+		u32 used_features = 0;
+
 		if (nla_type(flow) != NET_FLOW_FLOW)
 			continue;
 
@@ -1309,6 +1367,15 @@ static int net_flow_table_cmd_flows(struct sk_buff *recv_skb,
 		if (err)
 			goto out;
 
+		/* Set used_features here for each table feature that is used.
+		 * (Currently no table features are defined)
+		 */
+
+		err = net_flow_table_check_features(dev, this.table_id,
+						    used_features);
+		if (err)
+			break;
+
 		switch (cmd) {
 		case NET_FLOW_TABLE_CMD_SET_FLOWS:
 			err = dev->netdev_ops->ndo_flow_set_flows(dev, &this);
-- 
2.1.3

^ permalink raw reply related

* [PATCH/RFC flow-net-next 01/10] net: flow: Correct spelling of action
From: Simon Horman @ 2014-12-29  2:15 UTC (permalink / raw)
  To: John Fastabend, netdev; +Cc: Simon Horman
In-Reply-To: <1419819340-19000-1-git-send-email-simon.horman@netronome.com>

Signed-off-by: Simon Horman <simon.horman@netronome.com>

---

Compile tested only

Previously submitted individually to John Fastabend and netdev ML.
---
 include/uapi/linux/if_flow.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/if_flow.h b/include/uapi/linux/if_flow.h
index 598c009..25a6b31 100644
--- a/include/uapi/linux/if_flow.h
+++ b/include/uapi/linux/if_flow.h
@@ -356,7 +356,7 @@ enum {
  * @uid unique identifier for flow
  * @priority priority to execute flow match/action in table
  * @match null terminated set of match uids match criteria
- * @actoin null terminated set of action uids to apply to match
+ * @action null terminated set of action uids to apply to match
  *
  * Flows must match all entries in match set.
  */
-- 
2.1.3

^ permalink raw reply related

* [PATCH/RFC flow-net-next 00/10] Flow Table API Cache Enhancements
From: Simon Horman @ 2014-12-29  2:15 UTC (permalink / raw)
  To: John Fastabend, netdev; +Cc: Simon Horman

Hi John, Hi All,

the purpose of this patch-set is to propose enhancements to the flow table
API which you have been working on.  The enhancements focus on optional
support for tables which act as caches, where a cache is defined to be a
table that may remove entries other than via an explicit remove-flow API
call.

I will be up-front and say that many of the ideas implemented here have
been inspired by OpenFlow: something that I am familiar with.

A possible user of flow timeouts and counters, is Open vSwitch: it does not
currently make use of the Flow Table API but it treats its datapath as a
typically manages the cached by removing flows based on statistics
indicating the idleness of flows.

That said, I am not attempting to make the Flow Table API OpenFlow or Open
vSwitch specific. I have attempted to make all the enhancements optional,
so that they may only be used in software and hardware scenarios where they
make sense.

Thus far I have only prototyped the netlink API side of these enhancements
and compile tested them.

This series is based on John Fastabend's flow-net-next tree
https://github.com/jrfastab/flow-net-next


Simon Horman (10):
  net: flow: Correct spelling of action
  net: flow: Add features to tables
  net: flow: Add timeouts to flows
  net: flow: Add counters to flows
  net: flow: Add get, set and del notifier commands
  net: flow: Add flow removed notification
  net: flow: Add importance to flows
  net: flow: Add get and set table config commands
  net: flow: Add eviction flags to table configuration
  net: flow: Add flow removed notification for eviction

 include/linux/if_flow.h      |   3 +
 include/linux/netdevice.h    |  10 +
 include/uapi/linux/if_flow.h | 232 +++++++++++++++++-
 net/core/flow_table.c        | 561 ++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 798 insertions(+), 8 deletions(-)

-- 
2.1.3

^ permalink raw reply

* [CPSW driver] broadcast ethernet pkg will be dropped?
From: Zheng Yi @ 2014-12-29  1:36 UTC (permalink / raw)
  To: David S. Miller, netdev; +Cc: Mugunthan V N, Lokesh Vutla

[-- Attachment #1: Type: text/plain, Size: 2293 bytes --]

Hi Sir

   I found your mail address from Linux kernel git repo. I think there
   is a problem in CPSW driver. If it is not your duty to maintain
   those code, could you please tell me the ones who are in charge of
   it?

   I found that when a AM3352 board boot, I use a laptop to send ARP
   request to it, the board do not reponse. Even ifconfig do not show
   that there is any pkg sent to the CPSW device. I'm sure that the
   ARP request PKG received by SoC(confirmed by my oscilloscope).

   After reading the SoC doc (Tech ref manual) and reading the driver code, i found that:
   15.3.2.7 Address Lookup Engine (ALE)
   "Broadcast packets will be dropped unless the broadcast address is entered into the table with the super bit set."

   and in driver: drivers/net/ether/ti/cpsw_ale.c

   int cpsw_ale_add_mcast(struct cpsw_ale *ale, u8 *addr, int port_mask,
                          int flags, u16 vid, int mcast_state)
   {
          /* ....... */

          cpsw_ale_set_super(ale_entry, (flags & ALE_BLOCKED) ? 1 : 0);

          /* ....... */
   }

   I have searched all of the calling of that function, and found no
   ALE_BLOCKED flag was set.  Especially, in
   cpsw_add_dual_emac_def_ale_entries(), only ALE_VLAN was provided
   for the broadcast address.

   After change the flag(brutally), my board can response to the ARP broadcast requests correctly.

   Here are some questions I do not found the answer:
   1. the macro ALE_BLOCKED seems be used uncorrectly.
      If one want the pkg should be "blocked", the "super" bit should be cleared, not be set.
      So, in cpsw_ale_add_mcast(), we should do like this:
          cpsw_ale_set_super(ale_entry, (flags & ALE_BLOCKED) ? 0 : 1);

      Do you think the current logic is reversed?

   2. broadcast ethernet pkg from "FF:FF:FF:FF:FF:FF" should be alowed.
      If by default, it desiged to be filtered out, why not add an interface to support
      restore it? I do not found any code that can change filtering the broadcast pkg in runtime.






--
Brock Zheng <yzheng@techyauld.com>
郑 祎

北京中科腾越科技发展有限公司
北京市海淀区东北旺西路8号中关村软件园21号楼启明星辰大厦二层六区(邮编:100094)

[-- Attachment #2: Type: application/pgp-signature, Size: 473 bytes --]

^ permalink raw reply

* [PATCH iproute2 v2] man ss: Add state filter description
From: Vadim Kochan @ 2014-12-28 22:20 UTC (permalink / raw)
  To: netdev; +Cc: Vadim Kochan

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=y, Size: 2191 bytes --]

From: Vadim Kochan <vadim4j@gmail.com>

Stolen from generated doc/ss.html
Also added reference to RFC 793 for TCP states.

Signed-off-by: Vadim Kochan <vadim4j@gmail.com>
---
 man/man8/ss.8 | 41 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 39 insertions(+), 2 deletions(-)

diff --git a/man/man8/ss.8 b/man/man8/ss.8
index 2a4bbc5..f4b709b 100644
--- a/man/man8/ss.8
+++ b/man/man8/ss.8
@@ -127,8 +127,41 @@ Do not display anything, just dump raw information about TCP sockets to FILE aft
 Read filter information from FILE.
 Each line of FILE is interpreted like single command line option. If FILE is - stdin is used.
 .TP
-.B FILTER := [ state TCP-STATE ] [ EXPRESSION ]
+.B FILTER := [ state STATE-FILTER ] [ EXPRESSION ]
 Please take a look at the official documentation (Debian package iproute-doc) for details regarding filters.
+
+.SH STATE-FILTER
+
+.B STATE-FILTER
+allows to construct arbitrary set of states to match. Its syntax is sequence of keywords state and exclude followed by identifier of state.
+.TP
+Available identifiers are:
+
+All standard TCP states:
+.BR established ", " syn-sent ", " syn-recv ", " fin-wait-1 ", " fin-wait-2 ", " time-wait ", " closed ", " close-wait ", " last-ack ", "
+.BR  listen " and " closing.
+
+.B all
+- for all the states
+
+.B connected
+- all the states except for
+.BR listen " and " closed
+
+.B synchronized
+- all the
+.B connected
+states except for
+.B syn-sent
+
+.B bucket
+- states, which are maintained as minisockets, i.e.
+.BR time-wait " and " syn-recv
+
+.B big
+- opposite to
+.B bucket
+
 .SH USAGE EXAMPLES
 .TP
 .B ss -t -a
@@ -150,7 +183,11 @@ Find all local processes connected to X server.
 List all the tcp sockets in state FIN-WAIT-1 for our apache to network 193.233.7/24 and look at their timers.
 .SH SEE ALSO
 .BR ip (8),
-.BR /usr/share/doc/iproute-doc/ss.html " (package iproute­doc)"
+.BR /usr/share/doc/iproute-doc/ss.html " (package iproute­doc)",
+.br
+.BR RFC " 793 "
+- https://tools.ietf.org/rfc/rfc793.txt (TCP states) 
+
 .SH AUTHOR
 .I ss 
 was written by Alexey Kuznetosv, <kuznet@ms2.inr.ac.ru>.
-- 
2.1.3

^ permalink raw reply related

* Re: [PATCH iproute2] man ss: Add state filter description
From: Vadim Kochan @ 2014-12-28 22:19 UTC (permalink / raw)
  To: netdev; +Cc: Vadim Kochan
In-Reply-To: <1419766748-4468-1-git-send-email-vadim4j@gmail.com>

On Sun, Dec 28, 2014 at 01:39:08PM +0200, Vadim Kochan wrote:
> From: Vadim Kochan <vadim4j@gmail.com>
> 
> Stolen from generated doc/ss.html
> Also added reference to RFC 739 for TCP states.
> 

Will re-send v2 because of wrong RFC number ...

^ permalink raw reply

* Re: [PATCH v2 3/4] can: kvaser_usb: Don't send a RESET_CHIP for non-existing channels
From: Olivier Sobrie @ 2014-12-28 21:54 UTC (permalink / raw)
  To: Ahmed S. Darwish
  Cc: Oliver Hartkopp, Wolfgang Grandegger, Marc Kleine-Budde,
	David S. Miller, Paul Gortmaker, Greg KH, Linux-CAN, netdev, LKML
In-Reply-To: <20141225000256.GB24302@vivalin-002>

On Thu, Dec 25, 2014 at 02:02:56AM +0200, Ahmed S. Darwish wrote:
> From: Ahmed S. Darwish <ahmed.darwish@valeo.com>
> 
> "Someone reported me that recent leaf firmwares go in trouble when
> you send a command for a channel that does not exist. Instead ...
> you can move the reset command to kvaser_usb_init_one() function."

Please adapt the commit log message as follows:
Recent Leaf firmware versions (>= 3.1.557) do not allow to send commands for
non-existing channels. If a command is send for a non-existing channel,
the firmware crashes.

And you can add:
Reported-by: Christopher Storah <Christopher.Storah@invetech.com.au>
Signed-off-by: Olivier Sobrie <olivier@sobrie.be>

Kr,

Olivier

> 
> Suggested-by: Olivier Sobrie <olivier@sobrie.be>
> Signed-off-by: Ahmed S. Darwish <ahmed.darwish@valeo.com>
> ---
>  drivers/net/can/usb/kvaser_usb.c | 7 ++++---
>  1 file changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
> index 598e251..2791501 100644
> --- a/drivers/net/can/usb/kvaser_usb.c
> +++ b/drivers/net/can/usb/kvaser_usb.c
> @@ -1505,6 +1505,10 @@ static int kvaser_usb_init_one(struct usb_interface *intf,
>  	struct kvaser_usb_net_priv *priv;
>  	int i, err;
>  
> +	err = kvaser_usb_send_simple_msg(dev, CMD_RESET_CHIP, channel);
> +	if (err)
> +		return err;
> +
>  	netdev = alloc_candev(sizeof(*priv), MAX_TX_URBS);
>  	if (!netdev) {
>  		dev_err(&intf->dev, "Cannot alloc candev\n");
> @@ -1609,9 +1613,6 @@ static int kvaser_usb_probe(struct usb_interface *intf,
>  
>  	usb_set_intfdata(intf, dev);
>  
> -	for (i = 0; i < MAX_NET_DEVICES; i++)
> -		kvaser_usb_send_simple_msg(dev, CMD_RESET_CHIP, i);
> -
>  	err = kvaser_usb_get_software_info(dev);
>  	if (err) {
>  		dev_err(&intf->dev,

^ permalink raw reply

* Re: [PATCH v2 1/4] can: kvaser_usb: Don't free packets when tight on URBs
From: Olivier Sobrie @ 2014-12-28 21:52 UTC (permalink / raw)
  To: Ahmed S. Darwish
  Cc: Oliver Hartkopp, Wolfgang Grandegger, Marc Kleine-Budde,
	David S. Miller, Paul Gortmaker, Linux-CAN, netdev, Greg KH,
	Linux-stable, LKML
In-Reply-To: <20141224235644.GA3778@vivalin-002>

On Thu, Dec 25, 2014 at 01:56:44AM +0200, Ahmed S. Darwish wrote:
> From: Ahmed S. Darwish <ahmed.darwish@valeo.com>
> 
> Flooding the Kvaser CAN to USB dongle with multiple reads and
> writes in high frequency caused seemingly-random panics in the
> kernel.
> 
> On further inspection, it seems the driver erroneously freed the
> to-be-transmitted packet upon getting tight on URBs and returning
> NETDEV_TX_BUSY, leading to invalid memory writes and double frees
> at a later point in time.
> 
> Note:
> 
> Finding no more URBs/transmit-contexts and returning NETDEV_TX_BUSY
> is a driver bug in and out of itself: it means that our start/stop
> queue flow control is broken.
> 
> This patch only fixes the (buggy) error handling code; the root
> cause shall be fixed in a later commit.
> 
> Signed-off-by: Ahmed S. Darwish <ahmed.darwish@valeo.com>

Acked-by: Olivier Sobrie <olivier@sobrie.be>

> ---
>  drivers/net/can/usb/kvaser_usb.c | 12 ++++++------
>  1 file changed, 6 insertions(+), 6 deletions(-)
> 
>  (Marc, Greg, I believe this should also be added to -stable?)
> 
> diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
> index 541fb7a..6479a2b 100644
> --- a/drivers/net/can/usb/kvaser_usb.c
> +++ b/drivers/net/can/usb/kvaser_usb.c
> @@ -1294,12 +1294,14 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
>  	if (!urb) {
>  		netdev_err(netdev, "No memory left for URBs\n");
>  		stats->tx_dropped++;
> -		goto nourbmem;
> +		dev_kfree_skb(skb);
> +		return NETDEV_TX_OK;
>  	}
>  
>  	buf = kmalloc(sizeof(struct kvaser_msg), GFP_ATOMIC);
>  	if (!buf) {
>  		stats->tx_dropped++;
> +		dev_kfree_skb(skb);
>  		goto nobufmem;
>  	}
>  
> @@ -1334,6 +1336,9 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
>  		}
>  	}
>  
> +	/*
> +	 * This should never happen; it implies a flow control bug.
> +	 */
>  	if (!context) {
>  		netdev_warn(netdev, "cannot find free context\n");
>  		ret =  NETDEV_TX_BUSY;
> @@ -1364,9 +1369,6 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
>  	if (unlikely(err)) {
>  		can_free_echo_skb(netdev, context->echo_index);
>  
> -		skb = NULL; /* set to NULL to avoid double free in
> -			     * dev_kfree_skb(skb) */
> -
>  		atomic_dec(&priv->active_tx_urbs);
>  		usb_unanchor_urb(urb);
>  
> @@ -1388,8 +1390,6 @@ releasebuf:
>  	kfree(buf);
>  nobufmem:
>  	usb_free_urb(urb);
> -nourbmem:
> -	dev_kfree_skb(skb);
>  	return ret;
>  }
>  

-- 
Olivier

^ permalink raw reply

* Re: [PATCH] can: kvaser_usb: Add support for the Usbcan-II family
From: Olivier Sobrie @ 2014-12-28 21:51 UTC (permalink / raw)
  To: Ahmed S. Darwish
  Cc: Oliver Hartkopp, Wolfgang Grandegger, Marc Kleine-Budde,
	David S. Miller, Paul Gortmaker, Linux-CAN, netdev, LKML
In-Reply-To: <20141224150417.GB5707@vivalin-002>

Hello Ahmed,

On Wed, Dec 24, 2014 at 05:04:17PM +0200, Ahmed S. Darwish wrote:
> Hi Olivier,
> 
> On Wed, Dec 24, 2014 at 01:36:27PM +0100, Olivier Sobrie wrote:
> > Hello Ahmed,
> > 
> > On Tue, Dec 23, 2014 at 05:53:11PM +0200, Ahmed S. Darwish wrote:
> > > From: Ahmed S. Darwish <ahmed.darwish@valeo.com>
> > > 
> > > CAN to USB interfaces sold by the Swedish manufacturer Kvaser are
> > > divided into two major families: 'Leaf', and 'UsbcanII'.  From an
> > > Operating System perspective, the firmware of both families behave
> > > in a not too drastically different fashion.
> > > 
> > > This patch adds support for the UsbcanII family of devices to the
> > > current Kvaser Leaf-only driver.
> > > 
> > > CAN frames sending, receiving, and error handling paths has been
> > > tested using the dual-channel "Kvaser USBcan II HS/LS" dongle. It
> > > should also work nicely with other products in the same category.
> > > 
> > 
> > Good, thank you :-) I'll try to test the patch during the next
> > week-end. Small remarks below.
> > 
> 
> Great! thanks and Merry Christmas :-)
> 
> > > Signed-off-by: Ahmed S. Darwish <ahmed.darwish@valeo.com>
> > > ---
> > >  drivers/net/can/usb/kvaser_usb.c | 630 +++++++++++++++++++++++++++++++--------
> > >  1 file changed, 505 insertions(+), 125 deletions(-)
> > > 
> > >  (Generated over 3.19.0-rc1 + generic bugfix at
> > >   can-kvaser_usb-Don-t-free-packets-when-tight-on-URBs.patch)
> > > 
> > > diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c
> > > index 34c35d8..e7076da 100644
> > > --- a/drivers/net/can/usb/kvaser_usb.c
> > > +++ b/drivers/net/can/usb/kvaser_usb.c
> > > @@ -6,12 +6,15 @@
> > >   * Parts of this driver are based on the following:
> > >   *  - Kvaser linux leaf driver (version 4.78)
> > >   *  - CAN driver for esd CAN-USB/2
> > > + *  - Kvaser linux usbcanII driver (version 5.3)
> > >   *
> > >   * Copyright (C) 2002-2006 KVASER AB, Sweden. All rights reserved.
> > >   * Copyright (C) 2010 Matthias Fuchs <matthias.fuchs@esd.eu>, esd gmbh
> > >   * Copyright (C) 2012 Olivier Sobrie <olivier@sobrie.be>
> > > + * Copyright (C) 2014 Valeo Corporation
> > >   */
> > >  
> > > +#include <linux/kernel.h>
> > >  #include <linux/completion.h>
> > >  #include <linux/module.h>
> > >  #include <linux/netdevice.h>
> > > @@ -21,6 +24,18 @@
> > >  #include <linux/can/dev.h>
> > >  #include <linux/can/error.h>
> > >  
> > > +#define MAX(a, b) ((a) > (b) ? (a) : (b))
> > 
> > There is a max(a, b) macro in <linux/kernel.h>.
> > 
> 
> Quite true, but it unfortunately fails when the symbol is
> used in array size declaration as in below:
> 
>         struct kvaser_usb {
>                 ...
>                 struct kvaser_usb_net_priv *nets[MAX_NET_DEVICES];
>                 ...
>         }
> 
>         include/linux/kernel.h:713:19: error: braced-group within
> 	expression allowed only inside a function
>         #define max(x, y) ({    \
>                    ^

Just let MAX_NET_DEVICES equals to 3.

> 
> > > +
> > > +/*
> > > + * Kvaser USB CAN dongles are divided into two major families:
> > > + * - Leaf: Based on Renesas M32C, running firmware labeled as 'filo'
> > > + * - UsbcanII: Based on Renesas M16C, running firmware labeled as 'helios'
> > > + */
> > > +enum kvaser_usb_family {
> > > +	KVASER_LEAF,
> > > +	KVASER_USBCAN,
> > > +};
> > > +
> > >  #define MAX_TX_URBS			16
> > >  #define MAX_RX_URBS			4
> > >  #define START_TIMEOUT			1000 /* msecs */
> > > @@ -29,9 +44,12 @@
> > >  #define USB_RECV_TIMEOUT		1000 /* msecs */
> > >  #define RX_BUFFER_SIZE			3072
> > >  #define CAN_USB_CLOCK			8000000
> > > -#define MAX_NET_DEVICES			3
> > > +#define LEAF_MAX_NET_DEVICES		3
> > > +#define USBCAN_MAX_NET_DEVICES		2
> > > +#define MAX_NET_DEVICES			MAX(LEAF_MAX_NET_DEVICES, \
> > > +					    USBCAN_MAX_NET_DEVICES)
> > >  
> > > -/* Kvaser USB devices */
> > > +/* Leaf USB devices */
> > >  #define KVASER_VENDOR_ID		0x0bfd
> > >  #define USB_LEAF_DEVEL_PRODUCT_ID	10
> > >  #define USB_LEAF_LITE_PRODUCT_ID	11
> > > @@ -55,6 +73,16 @@
> > >  #define USB_CAN_R_PRODUCT_ID		39
> > >  #define USB_LEAF_LITE_V2_PRODUCT_ID	288
> > >  #define USB_MINI_PCIE_HS_PRODUCT_ID	289
> > > +#define LEAF_PRODUCT_ID(id)		(id >= USB_LEAF_DEVEL_PRODUCT_ID && \
> > > +					 id <= USB_MINI_PCIE_HS_PRODUCT_ID)
> > > +
> > > +/* USBCANII devices */
> > > +#define USB_USBCAN_REVB_PRODUCT_ID	2
> > > +#define USB_VCI2_PRODUCT_ID		3
> > > +#define USB_USBCAN2_PRODUCT_ID		4
> > > +#define USB_MEMORATOR_PRODUCT_ID	5
> > > +#define USBCAN_PRODUCT_ID(id)		(id >= USB_USBCAN_REVB_PRODUCT_ID && \
> > > +					 id <= USB_MEMORATOR_PRODUCT_ID)
> > >  
> > >  /* USB devices features */
> > >  #define KVASER_HAS_SILENT_MODE		BIT(0)
> > > @@ -73,7 +101,7 @@
> > >  #define MSG_FLAG_TX_ACK			BIT(6)
> > >  #define MSG_FLAG_TX_REQUEST		BIT(7)
> > >  
> > > -/* Can states */
> > > +/* Can states (M16C CxSTRH register) */
> > >  #define M16C_STATE_BUS_RESET		BIT(0)
> > >  #define M16C_STATE_BUS_ERROR		BIT(4)
> > >  #define M16C_STATE_BUS_PASSIVE		BIT(5)
> > > @@ -98,7 +126,13 @@
> > >  #define CMD_START_CHIP_REPLY		27
> > >  #define CMD_STOP_CHIP			28
> > >  #define CMD_STOP_CHIP_REPLY		29
> > > -#define CMD_GET_CARD_INFO2		32
> > > +#define CMD_READ_CLOCK			30
> > > +#define CMD_READ_CLOCK_REPLY		31
> > > +
> > > +#define LEAF_CMD_GET_CARD_INFO2		32
> > > +#define USBCAN_CMD_RESET_CLOCK		32
> > > +#define USBCAN_CMD_CLOCK_OVERFLOW_EVENT	33

I would prefer if you use CMD_LEAF_xxx and CMD_USBCAN_xxx.

> > > +
> > >  #define CMD_GET_CARD_INFO		34
> > >  #define CMD_GET_CARD_INFO_REPLY		35
> > >  #define CMD_GET_SOFTWARE_INFO		38
> > > @@ -108,8 +142,9 @@
> > >  #define CMD_RESET_ERROR_COUNTER		49
> > >  #define CMD_TX_ACKNOWLEDGE		50
> > >  #define CMD_CAN_ERROR_EVENT		51
> > > -#define CMD_USB_THROTTLE		77
> > > -#define CMD_LOG_MESSAGE			106
> > > +
> > > +#define LEAF_CMD_USB_THROTTLE		77
> > > +#define LEAF_CMD_LOG_MESSAGE		106
> > >  
> > >  /* error factors */
> > >  #define M16C_EF_ACKE			BIT(0)
> > > @@ -121,6 +156,13 @@
> > >  #define M16C_EF_RCVE			BIT(6)
> > >  #define M16C_EF_TRE			BIT(7)
> > >  
> > > +/* Only Leaf-based devices can report M16C error factors,
> > > + * thus define our own error status flags for USBCAN */
> > > +#define USBCAN_ERROR_STATE_NONE		0
> > > +#define USBCAN_ERROR_STATE_TX_ERROR	BIT(0)
> > > +#define USBCAN_ERROR_STATE_RX_ERROR	BIT(1)
> > > +#define USBCAN_ERROR_STATE_BUSERROR	BIT(2)
> > > +
> > >  /* bittiming parameters */
> > >  #define KVASER_USB_TSEG1_MIN		1
> > >  #define KVASER_USB_TSEG1_MAX		16
> > > @@ -137,7 +179,7 @@
> > >  #define KVASER_CTRL_MODE_SELFRECEPTION	3
> > >  #define KVASER_CTRL_MODE_OFF		4
> > >  
> > > -/* log message */
> > > +/* Extended CAN identifier flag */
> > >  #define KVASER_EXTENDED_FRAME		BIT(31)
> > >  
> > >  struct kvaser_msg_simple {
> > > @@ -148,30 +190,55 @@ struct kvaser_msg_simple {
> > >  struct kvaser_msg_cardinfo {
> > >  	u8 tid;
> > >  	u8 nchannels;
> > > -	__le32 serial_number;
> > > -	__le32 padding;
> > > +	union {
> > > +		struct {
> > > +			__le32 serial_number;
> > > +			__le32 padding;
> > > +		} __packed leaf0;
> > > +		struct {
> > > +			__le32 serial_number_low;
> > > +			__le32 serial_number_high;
> > > +		} __packed usbcan0;
> > > +	} __packed;
> > >  	__le32 clock_resolution;
> > >  	__le32 mfgdate;
> > >  	u8 ean[8];
> > >  	u8 hw_revision;
> > > -	u8 usb_hs_mode;
> > > -	__le16 padding2;
> > > +	union {
> > > +		struct {
> > > +			u8 usb_hs_mode;
> > > +		} __packed leaf1;
> > > +		struct {
> > > +			u8 padding;
> > > +		} __packed usbcan1;
> > > +	} __packed;
> > > +	__le16 padding;
> > >  } __packed;
> > >  
> > >  struct kvaser_msg_cardinfo2 {
> > >  	u8 tid;
> > > -	u8 channel;
> > > +	u8 reserved;
> > >  	u8 pcb_id[24];
> > >  	__le32 oem_unlock_code;
> > >  } __packed;
> > >  
> > > -struct kvaser_msg_softinfo {
> > > +struct leaf_msg_softinfo {
> > >  	u8 tid;
> > > -	u8 channel;
> > > +	u8 padding0;
> > >  	__le32 sw_options;
> > >  	__le32 fw_version;
> > >  	__le16 max_outstanding_tx;
> > > -	__le16 padding[9];
> > > +	__le16 padding1[9];
> > > +} __packed;
> > > +
> > > +struct usbcan_msg_softinfo {
> > > +	u8 tid;
> > > +	u8 fw_name[5];
> > > +	__le16 max_outstanding_tx;
> > > +	u8 padding[6];
> > > +	__le32 fw_version;
> > > +	__le16 checksum;
> > > +	__le16 sw_options;
> > >  } __packed;
> > >  
> > >  struct kvaser_msg_busparams {
> > > @@ -188,36 +255,86 @@ struct kvaser_msg_tx_can {
> > >  	u8 channel;
> > >  	u8 tid;
> > >  	u8 msg[14];
> > > -	u8 padding;
> > > -	u8 flags;
> > > +	union {
> > > +		struct {
> > > +			u8 padding;
> > > +			u8 flags;
> > > +		} __packed leaf;
> > > +		struct {
> > > +			u8 flags;
> > > +			u8 padding;
> > > +		} __packed usbcan;
> > > +	} __packed;
> > > +} __packed;
> > > +
> > > +struct kvaser_msg_rx_can_header {
> > > +	u8 channel;
> > > +	u8 flag;
> > >  } __packed;
> > >  
> > > -struct kvaser_msg_rx_can {
> > > +struct leaf_msg_rx_can {
> > >  	u8 channel;
> > >  	u8 flag;
> > > +
> > >  	__le16 time[3];
> > >  	u8 msg[14];
> > >  } __packed;
> > >  
> > > -struct kvaser_msg_chip_state_event {
> > > +struct usbcan_msg_rx_can {
> > > +	u8 channel;
> > > +	u8 flag;
> > > +
> > > +	u8 msg[14];
> > > +	__le16 time;
> > > +} __packed;
> > > +
> > > +struct leaf_msg_chip_state_event {
> > >  	u8 tid;
> > >  	u8 channel;
> > > +
> > >  	__le16 time[3];
> > >  	u8 tx_errors_count;
> > >  	u8 rx_errors_count;
> > > +
> > >  	u8 status;
> > >  	u8 padding[3];
> > >  } __packed;
> > >  
> > > -struct kvaser_msg_tx_acknowledge {
> > > +struct usbcan_msg_chip_state_event {
> > > +	u8 tid;
> > > +	u8 channel;
> > > +
> > > +	u8 tx_errors_count;
> > > +	u8 rx_errors_count;
> > > +	__le16 time;
> > > +
> > > +	u8 status;
> > > +	u8 padding[3];
> > > +} __packed;
> > > +
> > > +struct kvaser_msg_tx_acknowledge_header {
> > > +	u8 channel;
> > > +	u8 tid;
> > > +};
> > > +
> > > +struct leaf_msg_tx_acknowledge {
> > >  	u8 channel;
> > >  	u8 tid;
> > > +
> > >  	__le16 time[3];
> > >  	u8 flags;
> > >  	u8 time_offset;
> > >  } __packed;
> > >  
> > > -struct kvaser_msg_error_event {
> > > +struct usbcan_msg_tx_acknowledge {
> > > +	u8 channel;
> > > +	u8 tid;
> > > +
> > > +	__le16 time;
> > > +	__le16 padding;
> > > +} __packed;
> > > +
> > > +struct leaf_msg_error_event {
> > >  	u8 tid;
> > >  	u8 flags;
> > >  	__le16 time[3];
> > > @@ -229,6 +346,18 @@ struct kvaser_msg_error_event {
> > >  	u8 error_factor;
> > >  } __packed;
> > >  
> > > +struct usbcan_msg_error_event {
> > > +	u8 tid;
> > > +	u8 padding;
> > > +	u8 tx_errors_count_ch0;
> > > +	u8 rx_errors_count_ch0;
> > > +	u8 tx_errors_count_ch1;
> > > +	u8 rx_errors_count_ch1;
> > > +	u8 status_ch0;
> > > +	u8 status_ch1;
> > > +	__le16 time;
> > > +} __packed;
> > > +
> > >  struct kvaser_msg_ctrl_mode {
> > >  	u8 tid;
> > >  	u8 channel;
> > > @@ -243,7 +372,7 @@ struct kvaser_msg_flush_queue {
> > >  	u8 padding[3];
> > >  } __packed;
> > >  
> > > -struct kvaser_msg_log_message {
> > > +struct leaf_msg_log_message {
> > >  	u8 channel;
> > >  	u8 flags;
> > >  	__le16 time[3];
> > > @@ -260,19 +389,49 @@ struct kvaser_msg {
> > >  		struct kvaser_msg_simple simple;
> > >  		struct kvaser_msg_cardinfo cardinfo;
> > >  		struct kvaser_msg_cardinfo2 cardinfo2;
> > > -		struct kvaser_msg_softinfo softinfo;
> > >  		struct kvaser_msg_busparams busparams;
> > > +
> > > +		struct kvaser_msg_rx_can_header rx_can_header;
> > > +		struct kvaser_msg_tx_acknowledge_header tx_acknowledge_header;
> > > +
> > > +		union {
> > > +			struct leaf_msg_softinfo softinfo;
> > > +			struct leaf_msg_rx_can rx_can;
> > > +			struct leaf_msg_chip_state_event chip_state_event;
> > > +			struct leaf_msg_tx_acknowledge tx_acknowledge;
> > > +			struct leaf_msg_error_event error_event;
> > > +			struct leaf_msg_log_message log_message;
> > > +		} __packed leaf;
> > > +
> > > +		union {
> > > +			struct usbcan_msg_softinfo softinfo;
> > > +			struct usbcan_msg_rx_can rx_can;
> > > +			struct usbcan_msg_chip_state_event chip_state_event;
> > > +			struct usbcan_msg_tx_acknowledge tx_acknowledge;
> > > +			struct usbcan_msg_error_event error_event;
> > > +		} __packed usbcan;
> > > +
> > >  		struct kvaser_msg_tx_can tx_can;
> > > -		struct kvaser_msg_rx_can rx_can;
> > > -		struct kvaser_msg_chip_state_event chip_state_event;
> > > -		struct kvaser_msg_tx_acknowledge tx_acknowledge;
> > > -		struct kvaser_msg_error_event error_event;
> > >  		struct kvaser_msg_ctrl_mode ctrl_mode;
> > >  		struct kvaser_msg_flush_queue flush_queue;
> > > -		struct kvaser_msg_log_message log_message;
> > >  	} u;
> > >  } __packed;
> > >  
> > > +/* Leaf/USBCAN-agnostic summary of an error event.
> > > + * No M16C error factors for USBCAN-based devices. */
> > > +struct kvaser_error_summary {
> > > +	u8 channel, status, txerr, rxerr;
> > > +	union {
> > > +		struct {
> > > +			u8 error_factor;
> > > +		} leaf;
> > > +		struct {
> > > +			u8 other_ch_status;
> > > +			u8 error_state;
> > > +		} usbcan;
> > > +	};
> > > +};
> > > +
> > >  struct kvaser_usb_tx_urb_context {
> > >  	struct kvaser_usb_net_priv *priv;
> > >  	u32 echo_index;
> > > @@ -288,6 +447,8 @@ struct kvaser_usb {
> > >  
> > >  	u32 fw_version;
> > >  	unsigned int nchannels;
> > > +	enum kvaser_usb_family family;
> > > +	unsigned int max_channels;
> > >  
> > >  	bool rxinitdone;
> > >  	void *rxbuf[MAX_RX_URBS];
> > > @@ -311,6 +472,7 @@ struct kvaser_usb_net_priv {
> > >  };
> > >  
> > >  static const struct usb_device_id kvaser_usb_table[] = {
> > > +	/* Leaf family IDs */
> > >  	{ USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_DEVEL_PRODUCT_ID) },
> > >  	{ USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_PRODUCT_ID) },
> > >  	{ USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_PRODUCT_ID),
> > > @@ -360,6 +522,17 @@ static const struct usb_device_id kvaser_usb_table[] = {
> > >  		.driver_info = KVASER_HAS_TXRX_ERRORS },
> > >  	{ USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_V2_PRODUCT_ID) },
> > >  	{ USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_HS_PRODUCT_ID) },
> > > +
> > > +	/* USBCANII family IDs */
> > > +	{ USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN2_PRODUCT_ID),
> > > +		.driver_info = KVASER_HAS_TXRX_ERRORS },
> > > +	{ USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_REVB_PRODUCT_ID),
> > > +		.driver_info = KVASER_HAS_TXRX_ERRORS },
> > > +	{ USB_DEVICE(KVASER_VENDOR_ID, USB_MEMORATOR_PRODUCT_ID),
> > > +		.driver_info = KVASER_HAS_TXRX_ERRORS },
> > > +	{ USB_DEVICE(KVASER_VENDOR_ID, USB_VCI2_PRODUCT_ID),
> > > +		.driver_info = KVASER_HAS_TXRX_ERRORS },
> > > +
> > >  	{ }
> > >  };
> > >  MODULE_DEVICE_TABLE(usb, kvaser_usb_table);
> > > @@ -463,7 +636,18 @@ static int kvaser_usb_get_software_info(struct kvaser_usb *dev)
> > >  	if (err)
> > >  		return err;
> > >  
> > > -	dev->fw_version = le32_to_cpu(msg.u.softinfo.fw_version);
> > > +	switch (dev->family) {
> > > +	case KVASER_LEAF:
> > > +		dev->fw_version = le32_to_cpu(msg.u.leaf.softinfo.fw_version);
> > > +		break;
> > > +	case KVASER_USBCAN:
> > > +		dev->fw_version = le32_to_cpu(msg.u.usbcan.softinfo.fw_version);
> > > +		break;
> > > +	default:
> > > +		dev_err(dev->udev->dev.parent,
> > > +			"Invalid device family (%d)\n", dev->family);
> > > +		return -EINVAL;
> > > +	}
> > >  
> > >  	return 0;
> > >  }
> > > @@ -482,7 +666,7 @@ static int kvaser_usb_get_card_info(struct kvaser_usb *dev)
> > >  		return err;
> > >  
> > >  	dev->nchannels = msg.u.cardinfo.nchannels;
> > > -	if (dev->nchannels > MAX_NET_DEVICES)
> > > +	if (dev->nchannels > dev->max_channels)
> > >  		return -EINVAL;
> > 
> > IMHO, you can keep MAX_NET_DEVICES here.
> > 
> 
> The UsbcanII firmware hardcodes a maximum of 2 channels in
> its protocol. This is unfortunately due to its inability to
> tell whether an error event is from CAN channel 0 or ch 1,
> and also due to its error_event format:
> 
> struct usbcan_msg_error_event {
> 	u8 tid;
> 	u8 padding;
> 	u8 tx_errors_count_ch0;
> 	u8 rx_errors_count_ch0;
> 	u8 tx_errors_count_ch1;
> 	u8 rx_errors_count_ch1;
> 	u8 status_ch0;
> 	u8 status_ch1;
> 	__le16 time;
> } __packed;
> 
> But since we have MAX_NET_DEVICES = 3, and given the above,
> if the UsbcanII firmware reported to us having more than 2
> channels, then it's:
> 
> a) most probably a memory corruption bug either in the firmware
>    or in the driver
> b) an updated device/firmware we cannot support yet, since
>    we cannot arbitrate the origin of error events quite correctly
>    (especially in the case of CAN_ERR_BUSERROR, where the error
>    counters stays the same and we have to resort to other hacks.
>    Kindly check usbcan_report_error_if_applicable().)
> 
> So allowing more than 2 channels given the current set of
> affairs will really induce correctness problems :-(
> 
> > >  
> > >  	return 0;
> > > @@ -496,8 +680,10 @@ static void kvaser_usb_tx_acknowledge(const struct kvaser_usb *dev,
> > >  	struct kvaser_usb_net_priv *priv;
> > >  	struct sk_buff *skb;
> > >  	struct can_frame *cf;
> > > -	u8 channel = msg->u.tx_acknowledge.channel;
> > > -	u8 tid = msg->u.tx_acknowledge.tid;
> > > +	u8 channel, tid;
> > > +
> > > +	channel = msg->u.tx_acknowledge_header.channel;
> > > +	tid = msg->u.tx_acknowledge_header.tid;
> > >  
> > >  	if (channel >= dev->nchannels) {
> > >  		dev_err(dev->udev->dev.parent,
> > > @@ -615,37 +801,83 @@ static void kvaser_usb_unlink_tx_urbs(struct kvaser_usb_net_priv *priv)
> > >  		priv->tx_contexts[i].echo_index = MAX_TX_URBS;
> > >  }
> > >  
> > > -static void kvaser_usb_rx_error(const struct kvaser_usb *dev,
> > > -				const struct kvaser_msg *msg)
> > > +static void kvaser_report_error_event(const struct kvaser_usb *dev,
> > > +				      struct kvaser_error_summary *es);
> > > +
> > > +/*
> > > + * Report error to userspace iff the controller's errors counter has
> > > + * increased, or we're the only channel seeing the bus error state.
> > > + *
> > > + * As reported by USBCAN sheets, "the CAN controller has difficulties
> > > + * to tell whether an error frame arrived on channel 1 or on channel 2."
> > > + * Thus, error counters are compared with their earlier values to
> > > + * determine which channel was responsible for the error event.
> > > + */
> > > +static void usbcan_report_error_if_applicable(const struct kvaser_usb *dev,
> > > +					      struct kvaser_error_summary *es)
> > >  {
> > > -	struct can_frame *cf;
> > > -	struct sk_buff *skb;
> > > -	struct net_device_stats *stats;
> > >  	struct kvaser_usb_net_priv *priv;
> > > -	unsigned int new_state;
> > > -	u8 channel, status, txerr, rxerr, error_factor;
> > > +	int old_tx_err_count, old_rx_err_count, channel, report_error;
> > > +
> > > +	channel = es->channel;
> > > +	if (channel >= dev->nchannels) {
> > > +		dev_err(dev->udev->dev.parent,
> > > +			"Invalid channel number (%d)\n", channel);
> > > +		return;
> > > +	}
> > > +
> > > +	priv = dev->nets[channel];
> > > +	old_tx_err_count = priv->bec.txerr;
> > > +	old_rx_err_count = priv->bec.rxerr;
> > > +
> > > +	report_error = 0;
> > > +	if (es->txerr > old_tx_err_count) {
> > > +		es->usbcan.error_state |= USBCAN_ERROR_STATE_TX_ERROR;
> > > +		report_error = 1;
> > > +	}
> > > +	if (es->rxerr > old_rx_err_count) {
> > > +		es->usbcan.error_state |= USBCAN_ERROR_STATE_RX_ERROR;
> > > +		report_error = 1;
> > > +	}
> > > +	if ((es->status & M16C_STATE_BUS_ERROR) &&
> > > +	    !(es->usbcan.other_ch_status & M16C_STATE_BUS_ERROR)) {
> > > +		es->usbcan.error_state |= USBCAN_ERROR_STATE_BUSERROR;
> > > +		report_error = 1;
> > > +	}
> > > +
> > > +	if (report_error)
> > > +		kvaser_report_error_event(dev, es);
> > > +}
> > > +
> > > +/*
> > > + * Extract error summary from a Leaf-based device error message
> > > + */
> > > +static void leaf_extract_error_from_msg(const struct kvaser_usb *dev,
> > > +					const struct kvaser_msg *msg)
> > > +{
> > > +	struct kvaser_error_summary es = { 0, };
> > >  
> > >  	switch (msg->id) {
> > >  	case CMD_CAN_ERROR_EVENT:
> > > -		channel = msg->u.error_event.channel;
> > > -		status =  msg->u.error_event.status;
> > > -		txerr = msg->u.error_event.tx_errors_count;
> > > -		rxerr = msg->u.error_event.rx_errors_count;
> > > -		error_factor = msg->u.error_event.error_factor;
> > > +		es.channel = msg->u.leaf.error_event.channel;
> > > +		es.status =  msg->u.leaf.error_event.status;
> > > +		es.txerr = msg->u.leaf.error_event.tx_errors_count;
> > > +		es.rxerr = msg->u.leaf.error_event.rx_errors_count;
> > > +		es.leaf.error_factor = msg->u.leaf.error_event.error_factor;
> > >  		break;
> > > -	case CMD_LOG_MESSAGE:
> > > -		channel = msg->u.log_message.channel;
> > > -		status = msg->u.log_message.data[0];
> > > -		txerr = msg->u.log_message.data[2];
> > > -		rxerr = msg->u.log_message.data[3];
> > > -		error_factor = msg->u.log_message.data[1];
> > > +	case LEAF_CMD_LOG_MESSAGE:
> > > +		es.channel = msg->u.leaf.log_message.channel;
> > > +		es.status = msg->u.leaf.log_message.data[0];
> > > +		es.txerr = msg->u.leaf.log_message.data[2];
> > > +		es.rxerr = msg->u.leaf.log_message.data[3];
> > > +		es.leaf.error_factor = msg->u.leaf.log_message.data[1];
> > >  		break;
> > >  	case CMD_CHIP_STATE_EVENT:
> > > -		channel = msg->u.chip_state_event.channel;
> > > -		status =  msg->u.chip_state_event.status;
> > > -		txerr = msg->u.chip_state_event.tx_errors_count;
> > > -		rxerr = msg->u.chip_state_event.rx_errors_count;
> > > -		error_factor = 0;
> > > +		es.channel = msg->u.leaf.chip_state_event.channel;
> > > +		es.status =  msg->u.leaf.chip_state_event.status;
> > > +		es.txerr = msg->u.leaf.chip_state_event.tx_errors_count;
> > > +		es.rxerr = msg->u.leaf.chip_state_event.rx_errors_count;
> > > +		es.leaf.error_factor = 0;
> > >  		break;
> > >  	default:
> > >  		dev_err(dev->udev->dev.parent, "Invalid msg id (%d)\n",
> > > @@ -653,16 +885,92 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev,
> > >  		return;
> > >  	}
> > >  
> > > -	if (channel >= dev->nchannels) {
> > > +	kvaser_report_error_event(dev, &es);
> > > +}
> > > +
> > > +/*
> > > + * Extract summary from a USBCANII-based device error message.
> > > + */
> > > +static void usbcan_extract_error_from_msg(const struct kvaser_usb *dev,
> > > +					const struct kvaser_msg *msg)
> > > +{
> > > +	struct kvaser_error_summary es = { 0, };
> > > +
> > > +	switch (msg->id) {
> > > +
> > > +	/* Sometimes errors are sent as unsolicited chip state events */
> > > +	case CMD_CHIP_STATE_EVENT:
> > > +		es.channel = msg->u.usbcan.chip_state_event.channel;
> > > +		es.status =  msg->u.usbcan.chip_state_event.status;
> > > +		es.txerr = msg->u.usbcan.chip_state_event.tx_errors_count;
> > > +		es.rxerr = msg->u.usbcan.chip_state_event.rx_errors_count;
> > > +		usbcan_report_error_if_applicable(dev, &es);
> > > +		break;
> > > +
> > > +	case CMD_CAN_ERROR_EVENT:
> > > +		es.channel = 0;
> > > +		es.status = msg->u.usbcan.error_event.status_ch0;
> > > +		es.txerr = msg->u.usbcan.error_event.tx_errors_count_ch0;
> > > +		es.rxerr = msg->u.usbcan.error_event.rx_errors_count_ch0;
> > > +		es.usbcan.other_ch_status =
> > > +			msg->u.usbcan.error_event.status_ch1;
> > > +		usbcan_report_error_if_applicable(dev, &es);
> > > +
> > > +		/* For error events, the USBCAN firmware does not support
> > > +		 * more than 2 channels: ch0, and ch1. */
> > > +		if (dev->nchannels > 1) {
> > > +			es.channel = 1;
> > > +			es.status = msg->u.usbcan.error_event.status_ch1;
> > > +			es.txerr = msg->u.usbcan.error_event.tx_errors_count_ch1;
> > > +			es.rxerr = msg->u.usbcan.error_event.rx_errors_count_ch1;
> > > +			es.usbcan.other_ch_status =
> > > +				msg->u.usbcan.error_event.status_ch0;
> > > +			usbcan_report_error_if_applicable(dev, &es);
> > > +		}
> > > +		break;
> > > +
> > > +	default:
> > > +		dev_err(dev->udev->dev.parent, "Invalid msg id (%d)\n",
> > > +			msg->id);
> > > +	}
> > > +}
> > > +
> > > +static void kvaser_usb_rx_error(const struct kvaser_usb *dev,
> > > +				const struct kvaser_msg *msg)
> > > +{
> > > +	switch (dev->family) {
> > > +	case KVASER_LEAF:
> > > +		leaf_extract_error_from_msg(dev, msg);
> > > +		break;
> > > +	case KVASER_USBCAN:
> > > +		usbcan_extract_error_from_msg(dev, msg);
> > > +		break;
> > > +	default:
> > >  		dev_err(dev->udev->dev.parent,
> > > -			"Invalid channel number (%d)\n", channel);
> > > +			"Invalid device family (%d)\n", dev->family);
> > >  		return;
> > >  	}
> > > +}
> > >  
> > > -	priv = dev->nets[channel];
> > > +static void kvaser_report_error_event(const struct kvaser_usb *dev,
> > > +				      struct kvaser_error_summary *es)
> > > +{
> > > +	struct can_frame *cf;
> > > +	struct sk_buff *skb;
> > > +	struct net_device_stats *stats;
> > > +	struct kvaser_usb_net_priv *priv;
> > > +	unsigned int new_state;
> > > +
> > > +	if (es->channel >= dev->nchannels) {
> > > +		dev_err(dev->udev->dev.parent,
> > > +			"Invalid channel number (%d)\n", es->channel);
> > > +		return;
> > > +	}
> > > +
> > > +	priv = dev->nets[es->channel];
> > >  	stats = &priv->netdev->stats;
> > >  
> > > -	if (status & M16C_STATE_BUS_RESET) {
> > > +	if (es->status & M16C_STATE_BUS_RESET) {
> > >  		kvaser_usb_unlink_tx_urbs(priv);
> > >  		return;
> > >  	}
> > > @@ -675,9 +983,9 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev,
> > >  
> > >  	new_state = priv->can.state;
> > >  
> > > -	netdev_dbg(priv->netdev, "Error status: 0x%02x\n", status);
> > > +	netdev_dbg(priv->netdev, "Error status: 0x%02x\n", es->status);
> > >  
> > > -	if (status & M16C_STATE_BUS_OFF) {
> > > +	if (es->status & M16C_STATE_BUS_OFF) {
> > >  		cf->can_id |= CAN_ERR_BUSOFF;
> > >  
> > >  		priv->can.can_stats.bus_off++;
> > > @@ -687,12 +995,12 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev,
> > >  		netif_carrier_off(priv->netdev);
> > >  
> > >  		new_state = CAN_STATE_BUS_OFF;
> > > -	} else if (status & M16C_STATE_BUS_PASSIVE) {
> > > +	} else if (es->status & M16C_STATE_BUS_PASSIVE) {
> > >  		if (priv->can.state != CAN_STATE_ERROR_PASSIVE) {
> > >  			cf->can_id |= CAN_ERR_CRTL;
> > >  
> > > -			if (txerr || rxerr)
> > > -				cf->data[1] = (txerr > rxerr)
> > > +			if (es->txerr || es->rxerr)
> > > +				cf->data[1] = (es->txerr > es->rxerr)
> > >  						? CAN_ERR_CRTL_TX_PASSIVE
> > >  						: CAN_ERR_CRTL_RX_PASSIVE;
> > >  			else
> > > @@ -703,13 +1011,11 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev,
> > >  		}
> > >  
> > >  		new_state = CAN_STATE_ERROR_PASSIVE;
> > > -	}
> > > -
> > > -	if (status == M16C_STATE_BUS_ERROR) {
> > > +	} else if (es->status & M16C_STATE_BUS_ERROR) {
> > >  		if ((priv->can.state < CAN_STATE_ERROR_WARNING) &&
> > > -		    ((txerr >= 96) || (rxerr >= 96))) {
> > > +		    ((es->txerr >= 96) || (es->rxerr >= 96))) {
> > >  			cf->can_id |= CAN_ERR_CRTL;
> > > -			cf->data[1] = (txerr > rxerr)
> > > +			cf->data[1] = (es->txerr > es->rxerr)
> > >  					? CAN_ERR_CRTL_TX_WARNING
> > >  					: CAN_ERR_CRTL_RX_WARNING;
> > >  
> > > @@ -723,7 +1029,7 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev,
> > >  		}
> > >  	}
> > >  
> > > -	if (!status) {
> > > +	if (!es->status) {
> > >  		cf->can_id |= CAN_ERR_PROT;
> > >  		cf->data[2] = CAN_ERR_PROT_ACTIVE;
> > >  
> > > @@ -739,34 +1045,52 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev,
> > >  		priv->can.can_stats.restarts++;
> > >  	}
> > >  
> > > -	if (error_factor) {
> > > -		priv->can.can_stats.bus_error++;
> > > -		stats->rx_errors++;
> > > -
> > > -		cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
> > > -
> > > -		if (error_factor & M16C_EF_ACKE)
> > > -			cf->data[3] |= (CAN_ERR_PROT_LOC_ACK);
> > > -		if (error_factor & M16C_EF_CRCE)
> > > -			cf->data[3] |= (CAN_ERR_PROT_LOC_CRC_SEQ |
> > > -					CAN_ERR_PROT_LOC_CRC_DEL);
> > > -		if (error_factor & M16C_EF_FORME)
> > > -			cf->data[2] |= CAN_ERR_PROT_FORM;
> > > -		if (error_factor & M16C_EF_STFE)
> > > -			cf->data[2] |= CAN_ERR_PROT_STUFF;
> > > -		if (error_factor & M16C_EF_BITE0)
> > > -			cf->data[2] |= CAN_ERR_PROT_BIT0;
> > > -		if (error_factor & M16C_EF_BITE1)
> > > -			cf->data[2] |= CAN_ERR_PROT_BIT1;
> > > -		if (error_factor & M16C_EF_TRE)
> > > -			cf->data[2] |= CAN_ERR_PROT_TX;
> > > +	switch (dev->family) {
> > > +	case KVASER_LEAF:
> > > +		if (es->leaf.error_factor) {
> > > +			priv->can.can_stats.bus_error++;
> > > +			stats->rx_errors++;
> > > +
> > > +			cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
> > > +
> > > +			if (es->leaf.error_factor & M16C_EF_ACKE)
> > > +				cf->data[3] |= (CAN_ERR_PROT_LOC_ACK);
> > > +			if (es->leaf.error_factor & M16C_EF_CRCE)
> > > +				cf->data[3] |= (CAN_ERR_PROT_LOC_CRC_SEQ |
> > > +						CAN_ERR_PROT_LOC_CRC_DEL);
> > > +			if (es->leaf.error_factor & M16C_EF_FORME)
> > > +				cf->data[2] |= CAN_ERR_PROT_FORM;
> > > +			if (es->leaf.error_factor & M16C_EF_STFE)
> > > +				cf->data[2] |= CAN_ERR_PROT_STUFF;
> > > +			if (es->leaf.error_factor & M16C_EF_BITE0)
> > > +				cf->data[2] |= CAN_ERR_PROT_BIT0;
> > > +			if (es->leaf.error_factor & M16C_EF_BITE1)
> > > +				cf->data[2] |= CAN_ERR_PROT_BIT1;
> > > +			if (es->leaf.error_factor & M16C_EF_TRE)
> > > +				cf->data[2] |= CAN_ERR_PROT_TX;
> > > +		}
> > > +		break;
> > > +	case KVASER_USBCAN:
> > > +		if (es->usbcan.error_state & USBCAN_ERROR_STATE_TX_ERROR)
> > > +			stats->tx_errors++;
> > > +		if (es->usbcan.error_state & USBCAN_ERROR_STATE_RX_ERROR)
> > > +			stats->rx_errors++;
> > > +		if (es->usbcan.error_state & USBCAN_ERROR_STATE_BUSERROR) {
> > > +			priv->can.can_stats.bus_error++;
> > > +			cf->can_id |= CAN_ERR_BUSERROR;
> > > +		}
> > > +		break;
> > > +	default:
> > > +		dev_err(dev->udev->dev.parent,
> > > +			"Invalid device family (%d)\n", dev->family);
> > > +		goto err;
> > >  	}
> > >  
> > > -	cf->data[6] = txerr;
> > > -	cf->data[7] = rxerr;
> > > +	cf->data[6] = es->txerr;
> > > +	cf->data[7] = es->rxerr;
> > >  
> > > -	priv->bec.txerr = txerr;
> > > -	priv->bec.rxerr = rxerr;
> > > +	priv->bec.txerr = es->txerr;
> > > +	priv->bec.rxerr = es->rxerr;
> > >  
> > >  	priv->can.state = new_state;
> > >  
> > > @@ -774,6 +1098,11 @@ static void kvaser_usb_rx_error(const struct kvaser_usb *dev,
> > >  
> > >  	stats->rx_packets++;
> > >  	stats->rx_bytes += cf->can_dlc;
> > > +
> > > +	return;
> > > +
> > > +err:
> > > +	dev_kfree_skb(skb);
> > >  }
> > >  
> > >  static void kvaser_usb_rx_can_err(const struct kvaser_usb_net_priv *priv,
> > > @@ -783,16 +1112,16 @@ static void kvaser_usb_rx_can_err(const struct kvaser_usb_net_priv *priv,
> > >  	struct sk_buff *skb;
> > >  	struct net_device_stats *stats = &priv->netdev->stats;
> > >  
> > > -	if (msg->u.rx_can.flag & (MSG_FLAG_ERROR_FRAME |
> > > +	if (msg->u.rx_can_header.flag & (MSG_FLAG_ERROR_FRAME |
> > >  					 MSG_FLAG_NERR)) {
> > >  		netdev_err(priv->netdev, "Unknow error (flags: 0x%02x)\n",
> > > -			   msg->u.rx_can.flag);
> > > +			   msg->u.rx_can_header.flag);
> > >  
> > >  		stats->rx_errors++;
> > >  		return;
> > >  	}
> > >  
> > > -	if (msg->u.rx_can.flag & MSG_FLAG_OVERRUN) {
> > > +	if (msg->u.rx_can_header.flag & MSG_FLAG_OVERRUN) {
> > >  		skb = alloc_can_err_skb(priv->netdev, &cf);
> > >  		if (!skb) {
> > >  			stats->rx_dropped++;
> > > @@ -819,7 +1148,8 @@ static void kvaser_usb_rx_can_msg(const struct kvaser_usb *dev,
> > >  	struct can_frame *cf;
> > >  	struct sk_buff *skb;
> > >  	struct net_device_stats *stats;
> > > -	u8 channel = msg->u.rx_can.channel;
> > > +	u8 channel = msg->u.rx_can_header.channel;
> > > +	const u8 *rx_msg;
> > >  
> > >  	if (channel >= dev->nchannels) {
> > >  		dev_err(dev->udev->dev.parent,
> > > @@ -830,19 +1160,32 @@ static void kvaser_usb_rx_can_msg(const struct kvaser_usb *dev,
> > >  	priv = dev->nets[channel];
> > >  	stats = &priv->netdev->stats;
> > >  
> > > -	if ((msg->u.rx_can.flag & MSG_FLAG_ERROR_FRAME) &&
> > > -	    (msg->id == CMD_LOG_MESSAGE)) {
> > > +	if ((msg->u.rx_can_header.flag & MSG_FLAG_ERROR_FRAME) &&
> > > +	    (dev->family == KVASER_LEAF && msg->id == LEAF_CMD_LOG_MESSAGE)) {
> > >  		kvaser_usb_rx_error(dev, msg);
> > >  		return;
> > > -	} else if (msg->u.rx_can.flag & (MSG_FLAG_ERROR_FRAME |
> > > -					 MSG_FLAG_NERR |
> > > -					 MSG_FLAG_OVERRUN)) {
> > > +	} else if (msg->u.rx_can_header.flag & (MSG_FLAG_ERROR_FRAME |
> > > +						MSG_FLAG_NERR |
> > > +						MSG_FLAG_OVERRUN)) {
> > >  		kvaser_usb_rx_can_err(priv, msg);
> > >  		return;
> > > -	} else if (msg->u.rx_can.flag & ~MSG_FLAG_REMOTE_FRAME) {
> > > +	} else if (msg->u.rx_can_header.flag & ~MSG_FLAG_REMOTE_FRAME) {
> > >  		netdev_warn(priv->netdev,
> > >  			    "Unhandled frame (flags: 0x%02x)",
> > > -			    msg->u.rx_can.flag);
> > > +			    msg->u.rx_can_header.flag);
> > > +		return;
> > > +	}
> > > +
> > > +	switch (dev->family) {
> > > +	case KVASER_LEAF:
> > > +		rx_msg = msg->u.leaf.rx_can.msg;
> > > +		break;
> > > +	case KVASER_USBCAN:
> > > +		rx_msg = msg->u.usbcan.rx_can.msg;
> > > +		break;
> > > +	default:
> > > +		dev_err(dev->udev->dev.parent,
> > > +			"Invalid device family (%d)\n", dev->family);
> > >  		return;
> > >  	}
> > >  
> > > @@ -852,38 +1195,37 @@ static void kvaser_usb_rx_can_msg(const struct kvaser_usb *dev,
> > >  		return;
> > >  	}
> > >  
> > > -	if (msg->id == CMD_LOG_MESSAGE) {
> > > -		cf->can_id = le32_to_cpu(msg->u.log_message.id);
> > > +	if (dev->family == KVASER_LEAF && msg->id == LEAF_CMD_LOG_MESSAGE) {
> > > +		cf->can_id = le32_to_cpu(msg->u.leaf.log_message.id);
> > >  		if (cf->can_id & KVASER_EXTENDED_FRAME)
> > >  			cf->can_id &= CAN_EFF_MASK | CAN_EFF_FLAG;
> > >  		else
> > >  			cf->can_id &= CAN_SFF_MASK;
> > >  
> > > -		cf->can_dlc = get_can_dlc(msg->u.log_message.dlc);
> > > +		cf->can_dlc = get_can_dlc(msg->u.leaf.log_message.dlc);
> > >  
> > > -		if (msg->u.log_message.flags & MSG_FLAG_REMOTE_FRAME)
> > > +		if (msg->u.leaf.log_message.flags & MSG_FLAG_REMOTE_FRAME)
> > >  			cf->can_id |= CAN_RTR_FLAG;
> > >  		else
> > > -			memcpy(cf->data, &msg->u.log_message.data,
> > > +			memcpy(cf->data, &msg->u.leaf.log_message.data,
> > >  			       cf->can_dlc);
> > >  	} else {
> > > -		cf->can_id = ((msg->u.rx_can.msg[0] & 0x1f) << 6) |
> > > -			     (msg->u.rx_can.msg[1] & 0x3f);
> > > +		cf->can_id = ((rx_msg[0] & 0x1f) << 6) | (rx_msg[1] & 0x3f);
> > >  
> > >  		if (msg->id == CMD_RX_EXT_MESSAGE) {
> > >  			cf->can_id <<= 18;
> > > -			cf->can_id |= ((msg->u.rx_can.msg[2] & 0x0f) << 14) |
> > > -				      ((msg->u.rx_can.msg[3] & 0xff) << 6) |
> > > -				      (msg->u.rx_can.msg[4] & 0x3f);
> > > +			cf->can_id |= ((rx_msg[2] & 0x0f) << 14) |
> > > +				      ((rx_msg[3] & 0xff) << 6) |
> > > +				      (rx_msg[4] & 0x3f);
> > >  			cf->can_id |= CAN_EFF_FLAG;
> > >  		}
> > >  
> > > -		cf->can_dlc = get_can_dlc(msg->u.rx_can.msg[5]);
> > > +		cf->can_dlc = get_can_dlc(rx_msg[5]);
> > >  
> > > -		if (msg->u.rx_can.flag & MSG_FLAG_REMOTE_FRAME)
> > > +		if (msg->u.rx_can_header.flag & MSG_FLAG_REMOTE_FRAME)
> > >  			cf->can_id |= CAN_RTR_FLAG;
> > >  		else
> > > -			memcpy(cf->data, &msg->u.rx_can.msg[6],
> > > +			memcpy(cf->data, &rx_msg[6],
> > >  			       cf->can_dlc);
> > >  	}
> > >  
> > > @@ -947,7 +1289,12 @@ static void kvaser_usb_handle_message(const struct kvaser_usb *dev,
> > >  
> > >  	case CMD_RX_STD_MESSAGE:
> > >  	case CMD_RX_EXT_MESSAGE:
> > > -	case CMD_LOG_MESSAGE:
> > > +		kvaser_usb_rx_can_msg(dev, msg);
> > > +		break;
> > > +
> > > +	case LEAF_CMD_LOG_MESSAGE:
> > > +		if (dev->family != KVASER_LEAF)
> > > +			goto warn;
> > >  		kvaser_usb_rx_can_msg(dev, msg);
> > >  		break;
> > >  
> > > @@ -960,8 +1307,14 @@ static void kvaser_usb_handle_message(const struct kvaser_usb *dev,
> > >  		kvaser_usb_tx_acknowledge(dev, msg);
> > >  		break;
> > >  
> > > +	/* Ignored messages */
> > > +	case USBCAN_CMD_CLOCK_OVERFLOW_EVENT:
> > > +		if (dev->family != KVASER_USBCAN)
> > > +			goto warn;
> > > +		break;
> > > +
> > >  	default:
> > > -		dev_warn(dev->udev->dev.parent,
> > > +warn:		dev_warn(dev->udev->dev.parent,
> > >  			 "Unhandled message (%d)\n", msg->id);
> > >  		break;
> > >  	}
> > > @@ -1181,7 +1534,7 @@ static void kvaser_usb_unlink_all_urbs(struct kvaser_usb *dev)
> > >  				  dev->rxbuf[i],
> > >  				  dev->rxbuf_dma[i]);
> > >  
> > > -	for (i = 0; i < MAX_NET_DEVICES; i++) {
> > > +	for (i = 0; i < dev->max_channels; i++) {
> > 
> > here too... or replace it by nchannels.
> > 
> 
> Yes, indeed. nchannels is the correct choice here, especially since
> kvaser_usb_init_one() is called "dev->nchannels" times too.
> 
> > >  		struct kvaser_usb_net_priv *priv = dev->nets[i];
> > >  
> > >  		if (priv)
> > > @@ -1286,6 +1639,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
> > >  	struct kvaser_msg *msg;
> > >  	int i, err;
> > >  	int ret = NETDEV_TX_OK;
> > > +	uint8_t *msg_tx_can_flags;
> > >  	bool kfree_skb_on_error = true;
> > >  
> > >  	if (can_dropped_invalid_skb(netdev, skb))
> > > @@ -1306,9 +1660,23 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
> > >  
> > >  	msg = buf;
> > >  	msg->len = MSG_HEADER_LEN + sizeof(struct kvaser_msg_tx_can);
> > > -	msg->u.tx_can.flags = 0;
> > >  	msg->u.tx_can.channel = priv->channel;
> > >  
> > > +	switch (dev->family) {
> > > +	case KVASER_LEAF:
> > > +		msg_tx_can_flags = &msg->u.tx_can.leaf.flags;
> > > +		break;
> > > +	case KVASER_USBCAN:
> > > +		msg_tx_can_flags = &msg->u.tx_can.usbcan.flags;
> > > +		break;
> > > +	default:
> > > +		dev_err(dev->udev->dev.parent,
> > > +			"Invalid device family (%d)\n", dev->family);
> > > +		goto releasebuf;
> > > +	}
> > > +
> > > +	*msg_tx_can_flags = 0;
> > > +
> > >  	if (cf->can_id & CAN_EFF_FLAG) {
> > >  		msg->id = CMD_TX_EXT_MESSAGE;
> > >  		msg->u.tx_can.msg[0] = (cf->can_id >> 24) & 0x1f;
> > > @@ -1326,7 +1694,7 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb,
> > >  	memcpy(&msg->u.tx_can.msg[6], cf->data, cf->can_dlc);
> > >  
> > >  	if (cf->can_id & CAN_RTR_FLAG)
> > > -		msg->u.tx_can.flags |= MSG_FLAG_REMOTE_FRAME;
> > > +		*msg_tx_can_flags |= MSG_FLAG_REMOTE_FRAME;
> > >  
> > >  	for (i = 0; i < ARRAY_SIZE(priv->tx_contexts); i++) {
> > >  		if (priv->tx_contexts[i].echo_index == MAX_TX_URBS) {
> > > @@ -1596,6 +1964,18 @@ static int kvaser_usb_probe(struct usb_interface *intf,
> > >  	if (!dev)
> > >  		return -ENOMEM;
> > >  
> > > +	if (LEAF_PRODUCT_ID(id->idProduct)) {
> > > +		dev->family = KVASER_LEAF;
> > > +		dev->max_channels = LEAF_MAX_NET_DEVICES;
> > > +	} else if (USBCAN_PRODUCT_ID(id->idProduct)) {
> > > +		dev->family = KVASER_USBCAN;
> > > +		dev->max_channels = USBCAN_MAX_NET_DEVICES;
> > > +	} else {
> > > +		dev_err(&intf->dev, "Product ID (%d) does not belong to any "
> > > +				    "known Kvaser USB family", id->idProduct);
> > > +		return -ENODEV;
> > > +	}
> > > +
> > 
> > Is it really required to keep max_channels in the kvaser_usb structure?
> > If I looked correctly, you use this variable as a replacement for
> > MAX_NET_DEVICES in the code and MAX_NET_DEVICES is only used in probe
> > and disconnect functions. I think it can even be replaced by nchannels
> > in the disconnect path. So I also think that it don't need to be in the
> > kvaser_usb structure.
> > 
> 
> hmmm.. given the current state of error arbitration explained
> above, where I cannot accept a dev->nchannels > 2, I guess we
> have two options:
> 
> a) Remove max_channels, and hardcode the channels count
> correctness logic as follows:
> 
>         dev->nchannels = msg.u.cardinfo.nchannels;
>         if ((dev->family == USBCAN && dev->nchannels > USBCAN_MAX_NET_DEVICES)
>             || (dev->family == LEAF && dev->nchannels > LEAF_MAX_NET_DEVICES))
>                 return -EINVAL
> 
> b) Leave max_channels in 'struct kvaser_usb' as is.
> 
> I personally prefer the solution at 'b)' but I can do it as
> in 'a)' if you prefer :-)

Keeping max_channels in the kvaser_usb structure is useless because it
is only used in one function that is called in the probe function.

I would prefer to have:
	if (dev->nchannels > MAX_NET_DEVICES)
		return -EINVAL

	if ((dev->family == USBCAN) &&
	    (dev->nchannels > MAX_USBCAN_NET_DEVICES))
		return -EINVAL

You can remove LEAF_MAX_NET_DEVICES which is not used, keep
MAX_NET_DEVICES equals to 3 and remove the MAX() macro.
The test specific to the USBCAN family can eventually be moved in the
kvaser_usb_probe() function.

> 
> > >  	err = kvaser_usb_get_endpoints(intf, &dev->bulk_in, &dev->bulk_out);
> > >  	if (err) {
> > >  		dev_err(&intf->dev, "Cannot get usb endpoint(s)");
> > > @@ -1608,7 +1988,7 @@ static int kvaser_usb_probe(struct usb_interface *intf,
> > >  
> > >  	usb_set_intfdata(intf, dev);
> > >  
> > > -	for (i = 0; i < MAX_NET_DEVICES; i++)
> > > +	for (i = 0; i < dev->max_channels; i++)
> > >  		kvaser_usb_send_simple_msg(dev, CMD_RESET_CHIP, i);
> > 
> > Someone reported me that recent leaf firmwares go in trouble when
> > you send a command for a channel that does not exist. Instead of
> > max_channels, you can use nchannels here and move the reset command
> > in the kvaser_usb_init_one() function.
> > I've a patch for this but It is not tested yet. I'll send it next week-end after
> > I did some tests.
> > 
> 
> Great. I guess I can submit a 3-patch series now
> (kfree_skb fix + the above fix + driver).
> 
> > >  
> > >  	err = kvaser_usb_get_software_info(dev);
> > 
> > Thank you,
> > 
> 
> Thanks a lot for your review.
> 
> P.S. the Gmail mailer you've used messed badly with the patch
> code identation; I had to manually restore it back to make the
> discussion meaningful for others :-)
> 
> Regards,
> --
> Darwish

Kr,

-- 
Olivier

^ permalink raw reply

* Re: [PATCH net-next] net: stmmac: add BQL support
From: Beniamino Galvani @ 2014-12-28 21:48 UTC (permalink / raw)
  To: Dave Taht
  Cc: Giuseppe Cavallaro, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <CAA93jw7XMK0foH_Hf9PHitV+Xhwxu15xATFv-jnn8_1Tv+kFCA@mail.gmail.com>

On Sun, Dec 28, 2014 at 08:25:40AM -0800, Dave Taht wrote:
> On Sun, Dec 28, 2014 at 6:57 AM, Beniamino Galvani <b.galvani@gmail.com> wrote:
> > Add support for Byte Queue Limits to the STMicro MAC driver.
> 
> Thank you!
> 
> > Tested on a Amlogic S805 Cortex-A5 board, where the use of BQL
> > slightly decreases the ping latency from ~10ms to ~3ms when the
> > 100Mbps link is saturated by TCP streams. No difference is
> > observed at 1Gbps.
> 
> I see the plural. With TSQ in place it is hard (without something like
> the rrul test driving multiple streams) to drive a driver to
> saturation with small numbers of flows. This was with pfifo_fast, not
> sch_fq, at 100mbit?

Hi Dave,

yes, this was with pfifo_fast and I used 4 iperf TCP streams. The total
throughput didn't seem to increase adding more streams.

> 
> Can this board actually drive a full gigabit in the first place? Until
> now most of the low end arm boards I have seen only came with
> a 100mbit mac, and the gig ones lacking offloads seemed to peak
> out at about 600mbit.

I measured a throughput of 650mbit in rx and 600mbit in tx.

> 
> Under my christmas tree landed a quad core A5 (odroid-c1), also an
> xgene and zedboard - both of the latter are a-needing BQL,
> and I haven't booted the udroid yet. Hopefully it is the
> same driver you just improved.

I'm using the odroid-c1 too, with this tree based on the recent
Amlogic mainline work:

  https://github.com/bengal/linux/tree/meson8b

Unfortunately at the moment the support for the board is very basic
(for example, SMP is not working yet) but it's enough to do some NIC
tests.

Beniamino

^ permalink raw reply

* Re: [PATCH v2 2/6] GMAC: define clock ID used for GMAC
From: Heiko Stübner @ 2014-12-28 20:51 UTC (permalink / raw)
  To: Roger Chen
  Cc: peppe.cavallaro, netdev, linux-kernel, linux-rockchip, kever.yang,
	eddie.cai
In-Reply-To: <1417056766-3678-1-git-send-email-roger.chen@rock-chips.com>

Hi Roger,

patches modifying the rockchip clock parts should have a subject like
	clk: rockchip: .....

Same for the following patch

Am Donnerstag, 27. November 2014, 10:52:46 schrieb Roger Chen:
> Signed-off-by: Roger Chen <roger.chen@rock-chips.com>
> ---
>  include/dt-bindings/clock/rk3288-cru.h |    4 ++++
>  1 file changed, 4 insertions(+)
> 
> diff --git a/include/dt-bindings/clock/rk3288-cru.h
> b/include/dt-bindings/clock/rk3288-cru.h index 100a08c..f9496f5 100644
> --- a/include/dt-bindings/clock/rk3288-cru.h
> +++ b/include/dt-bindings/clock/rk3288-cru.h
> @@ -72,6 +72,10 @@
>  #define SCLK_HEVC_CABAC		111
>  #define SCLK_HEVC_CORE		112
> 
> +#define SCLK_MAC_PLL		150

Why do you need to export the mac pll source selection?
I understand SCLK_MAC below, as you will want to select ext_gmac explicitly if 
available. But otherwise, why would you need to explicitly select the pll 
source?

The clock framework is intelligent enough that when you do a 
clk_set_rate(sclk_mac, 50000000) for example that it will select the best pll 
source + best divider to provide the most accurate frequency for this, so 
there should be no need to handle the pll source manually.


Heiko

> +#define SCLK_MAC		151
> +#define SCLK_MACREF_OUT		152
> +
>  #define DCLK_VOP0		190
>  #define DCLK_VOP1		191

^ permalink raw reply

* Re: [PATCH v2 1/6] GMAC: add driver for Rockchip RK3288 SoCs integrated GMAC
From: Heiko Stübner @ 2014-12-28 20:28 UTC (permalink / raw)
  To: Roger
  Cc: peppe.cavallaro, netdev, linux-kernel, linux-rockchip, kever.yang,
	eddie.cai
In-Reply-To: <549B5E50.2000501@rock-chips.com>

Am Donnerstag, 25. Dezember 2014, 08:46:08 schrieb Roger:
> Hi! Heiko
> 
> Any suggestion?

Follow the suggestions from Chen-Yu Tsai (and me) :-) .

I.e. use the phy-reset support the stmmac already provides and control power 
stuff using the regulator framework even if it's only a gpio-controlled switch.

See the vcc_host regulator in rk3288-evb.dtsi as an example on how these 
switches should normally be handled.

Heiko


> 
> On 2014/12/3 15:57, Roger wrote:
> > Hi! Heiko
> > 
> > about regulator, power gpio,  reset gpio and irq gpio
> > please refer to my comment inline, tks.
> > 
> > On 2014/12/2 7:44, Heiko Stübner wrote:
> >> Hi Roger,
> >> 
> >> the comments inline are a rough first review. I hope to get a clearer
> >> picture
> >> for the stuff I'm not sure about in v3 once the big issues are fixed.
> >> 
> >> Am Donnerstag, 27. November 2014, 10:52:08 schrieb Roger Chen:
> >>> This driver is based on stmmac driver.
> >>> 
> >>> modification based on Giuseppe CAVALLARO's suggestion:
> >>> 1. use BIT()
> >>> 
> >>>     > +/*RK3288_GRF_SOC_CON3*/
> >>>     > +#define GMAC_TXCLK_DLY_ENABLE   ((0x4000 << 16) | (0x4000))
> >>>     > +#define GMAC_TXCLK_DLY_DISABLE  ((0x4000 << 16) | (0x0000))
> >>>     
> >>>     ...
> >>>     
> >>>     why do not use BIT and BIT_MASK where possible?
> >>>     
> >>>     ===>after modification:
> >>>     
> >>>     #define GRF_BIT(nr)     (BIT(nr) | BIT(nr+16))
> >>>     #define GRF_CLR_BIT(nr) (BIT(nr+16))
> >>>     #define GMAC_TXCLK_DLY_ENABLE   GRF_BIT(14)
> >>>     #define GMAC_TXCLK_DLY_DISABLE  GRF_CLR_BIT(14)
> >>>     ...
> >>> 
> >>> 2.
> >>> 
> >>>     > +    regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
> >>>     > +             GMAC_PHY_INTF_SEL_RGMII);
> >>>     > +    regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
> >>>     > +             GMAC_RMII_MODE_CLR);
> >>>     
> >>>     maybe you could perform just one write unless there is some HW
> >>>     constraint.
> >>>     
> >>>     ===>after modification:
> >>>     
> >>>     regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
> >>>     
> >>>              GMAC_PHY_INTF_SEL_RGMII | GMAC_RMII_MODE_CLR);
> >>> 
> >>> 3. use macros
> >>> 
> >>>     > +    regmap_write(bsp_priv->grf, RK3288_GRF_GPIO3D_E,
> >>> 
> >>> 0xFFFFFFFF);
> >>> 
> >>>     > +    regmap_write(bsp_priv->grf, RK3288_GRF_GPIO4B_E,
> >>>     > +             0x3<<2<<16 | 0x3<<2);
> >>>     
> >>>     pls use macros, these shift sequence is really help to decode
> >>>     
> >>>     ===>after modification:
> >>>     
> >>>     regmap_write(bsp_priv->grf, RK3288_GRF_GPIO4A_E, GPIO4A_12MA);
> >>>     regmap_write(bsp_priv->grf, RK3288_GRF_GPIO4B_E, GPIO4B_2_12MA);
> >>> 
> >>> 4. remove grf fail check in rk_gmac_setup()
> >>> 
> >>>     > +    if (IS_ERR(bsp_priv->grf))
> >>>     > +        dev_err(&pdev->dev, "Missing rockchip,grf property\n");
> >>>     
> >>>     I wonder if you can fail on here and save all the check in
> >>>     set_rgmii_speed etc.
> >>>     Maybe this can be considered a mandatory property for the
> >>> 
> >>> glue-logic.
> >>> 
> >>> 5. remove .tx_coe=1
> >>> 
> >>>     > +const struct stmmac_of_data rk_gmac_data = {
> >>>     > +    .has_gmac = 1,
> >>>     > +    .tx_coe = 1,
> >>>     
> >>>     FYI, on new gmac there is the HW capability register to dinamically
> >>>     provide you if coe is supported.
> >>>     
> >>>     IMO you should add the OF "compatible" string and in case of mac
> >>>     newer than the 3.50a you can remove coe.
> >> 
> >> changelogs like these, should be compact and also not be in the
> >> commit message
> >> itself, but in the "comment"-section below the "---" and before the
> >> diffstat.
> >> 
> >>> Signed-off-by: Roger Chen <roger.chen@rock-chips.com>
> >>> ---
> >> 
> >> changelog here ... the commonly used pattern is something like
> >> 
> >> changes since v2:
> >> - ...
> >> - ...
> >> 
> >> changes since v1:
> >> - ...
> >> 
> >>> drivers/net/ethernet/stmicro/stmmac/Makefile       |    2 +-
> >>> 
> >>>   drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c     |  636
> >>> 
> >>> ++++++++++++++++++++
> >>> .../net/ethernet/stmicro/stmmac/stmmac_platform.c  |
> >>> 
> >>>   3 +
> >>>   .../net/ethernet/stmicro/stmmac/stmmac_platform.h  |    1 +
> >>>   4 files changed, 641 insertions(+), 1 deletion(-)
> >>>   create mode 100644 drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
> >>> 
> >>> diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile
> >>> b/drivers/net/ethernet/stmicro/stmmac/Makefile index ac4d562..73c2715
> >>> 100644
> >>> --- a/drivers/net/ethernet/stmicro/stmmac/Makefile
> >>> +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
> >>> @@ -6,7 +6,7 @@ stmmac-objs:= stmmac_main.o stmmac_ethtool.o
> >>> stmmac_mdio.o
> >>> ring_mode.o    \
> >>> 
> >>>   obj-$(CONFIG_STMMAC_PLATFORM) += stmmac-platform.o
> >>>   stmmac-platform-objs:= stmmac_platform.o dwmac-meson.o
> >>> 
> >>> dwmac-sunxi.o    \
> >>> -               dwmac-sti.o dwmac-socfpga.o
> >>> +               dwmac-sti.o dwmac-socfpga.o dwmac-rk.o
> >>> 
> >>>   obj-$(CONFIG_STMMAC_PCI) += stmmac-pci.o
> >>>   stmmac-pci-objs:= stmmac_pci.o
> >>> 
> >>> diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
> >>> b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c new file mode 100644
> >>> index 0000000..870563f
> >>> --- /dev/null
> >>> +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
> >>> @@ -0,0 +1,636 @@
> >>> +/**
> >>> + * dwmac-rk.c - Rockchip RK3288 DWMAC specific glue layer
> >>> + *
> >>> + * Copyright (C) 2014 Chen-Zhi (Roger Chen)
> >>> + *
> >>> + * Chen-Zhi (Roger Chen)  <roger.chen@rock-chips.com>
> >>> + *
> >>> + * This program is free software; you can redistribute it and/or
> >>> modify
> >>> + * it under the terms of the GNU General Public License as
> >>> published by
> >>> + * the Free Software Foundation; either version 2 of the License, or
> >>> + * (at your option) any later version.
> >>> + *
> >>> + * This program is distributed in the hope that it will be useful,
> >>> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> >>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> >>> + * GNU General Public License for more details.
> >>> + */
> >>> +
> >>> +#include <linux/stmmac.h>
> >>> +#include <linux/bitops.h>
> >>> +#include <linux/clk.h>
> >>> +#include <linux/phy.h>
> >>> +#include <linux/of_net.h>
> >>> +#include <linux/gpio.h>
> >>> +#include <linux/of_gpio.h>
> >>> +#include <linux/of_device.h>
> >>> +#include <linux/regulator/consumer.h>
> >>> +#include <linux/delay.h>
> >>> +#include <linux/mfd/syscon.h>
> >>> +#include <linux/regmap.h>
> >>> +
> >>> +struct rk_priv_data {
> >>> +    struct platform_device *pdev;
> >>> +    int phy_iface;
> >>> +    bool power_ctrl_by_pmu;
> >>> +    char pmu_regulator[32];
> >>> +    int pmu_enable_level;
> >>> +
> >>> +    int power_io;
> >>> +    int power_io_level;
> >>> +    int reset_io;
> >>> +    int reset_io_level;
> >>> +    int phyirq_io;
> >>> +    int phyirq_io_level;
> >>> +
> >>> +    bool clk_enabled;
> >>> +    bool clock_input;
> >>> +
> >>> +    struct clk *clk_mac;
> >>> +    struct clk *clk_mac_pll;
> >>> +    struct clk *gmac_clkin;
> >>> +    struct clk *mac_clk_rx;
> >>> +    struct clk *mac_clk_tx;
> >>> +    struct clk *clk_mac_ref;
> >>> +    struct clk *clk_mac_refout;
> >>> +    struct clk *aclk_mac;
> >>> +    struct clk *pclk_mac;
> >>> +
> >>> +    int tx_delay;
> >>> +    int rx_delay;
> >>> +
> >>> +    struct regmap *grf;
> >>> +};
> >>> +
> >>> +#define RK3288_GRF_SOC_CON1 0x0248
> >>> +#define RK3288_GRF_SOC_CON3 0x0250
> >>> +#define RK3288_GRF_GPIO3D_E 0x01ec
> >>> +#define RK3288_GRF_GPIO4A_E 0x01f0
> >>> +#define RK3288_GRF_GPIO4B_E 0x01f4
> >> 
> >> here you're using a space instead of a tab, please select one pattern
> >> either
> >> tabs or space but do not mix them.
> >> 
> >>> +#define GPIO3D_2MA    0xFFFF0000
> >>> +#define GPIO3D_4MA    0xFFFF5555
> >>> +#define GPIO3D_8MA    0xFFFFAAAA
> >>> +#define GPIO3D_12MA    0xFFFFFFFF
> >>> +
> >>> +#define GPIO4A_2MA    0xFFFF0000
> >>> +#define GPIO4A_4MA    0xFFFF5555
> >>> +#define GPIO4A_8MA    0xFFFFAAAA
> >>> +#define GPIO4A_12MA    0xFFFFFFFF
> >> 
> >> see comment about pin settings below
> >> 
> >>> +
> >>> +#define GRF_BIT(nr)    (BIT(nr) | BIT(nr+16))
> >>> +#define GRF_CLR_BIT(nr)    (BIT(nr+16))
> >>> +
> >>> +#define GPIO4B_2_2MA    (GRF_CLR_BIT(2) | GRF_CLR_BIT(3))
> >>> +#define GPIO4B_2_4MA    (GRF_BIT(2) | GRF_CLR_BIT(3))
> >>> +#define GPIO4B_2_8MA    (GRF_CLR_BIT(2) | GRF_BIT(3))
> >>> +#define GPIO4B_2_12MA    (GRF_BIT(2) | GRF_BIT(3))
> >>> +
> >>> +/*RK3288_GRF_SOC_CON1*/
> >>> +#define GMAC_PHY_INTF_SEL_RGMII    (GRF_BIT(6) | GRF_CLR_BIT(7) |
> >>> GRF_CLR_BIT(8))
> >>> +#define GMAC_PHY_INTF_SEL_RMII  (GRF_CLR_BIT(6) |
> >>> GRF_CLR_BIT(7) | GRF_BIT(8))
> >>> +#define GMAC_FLOW_CTRL          GRF_BIT(9)
> >>> +#define GMAC_FLOW_CTRL_CLR      GRF_CLR_BIT(9)
> >>> +#define GMAC_SPEED_10M          GRF_CLR_BIT(10)
> >>> +#define GMAC_SPEED_100M         GRF_BIT(10)
> >>> +#define GMAC_RMII_CLK_25M       GRF_BIT(11)
> >>> +#define GMAC_RMII_CLK_2_5M      GRF_CLR_BIT(11)
> >>> +#define GMAC_CLK_125M           (GRF_CLR_BIT(12) | GRF_CLR_BIT(13))
> >>> +#define GMAC_CLK_25M            (GRF_BIT(12) | GRF_BIT(13))
> >>> +#define GMAC_CLK_2_5M           (GRF_CLR_BIT(12) | GRF_BIT(13))
> >>> +#define GMAC_RMII_MODE          GRF_BIT(14)
> >>> +#define GMAC_RMII_MODE_CLR      GRF_CLR_BIT(14)
> >>> +
> >>> +/*RK3288_GRF_SOC_CON3*/
> >>> +#define GMAC_TXCLK_DLY_ENABLE   GRF_BIT(14)
> >>> +#define GMAC_TXCLK_DLY_DISABLE  GRF_CLR_BIT(14)
> >>> +#define GMAC_RXCLK_DLY_ENABLE   GRF_BIT(15)
> >>> +#define GMAC_RXCLK_DLY_DISABLE  GRF_CLR_BIT(15)
> >>> +#define GMAC_CLK_RX_DL_CFG(val) ((0x7F<<7<<16) | (val<<7))
> >>> +#define GMAC_CLK_TX_DL_CFG(val) ((0x7F<<16) | (val))
> >> 
> >> again mixed tabs and spaces as delimiters.
> >> 
> >> Also the _CFG macros are not well abstracted. You could take a look
> >> at the
> >> HIWORD_UPDATE macro in drivers/clk/rockchip/clk.h:
> >> 
> >> #define GMAC_CLK_DL_MASK    0x7f
> >> #define GMAC_CLK_RX_DL_CFG(val)  HIWORD_UPDATE(val, GMAC_CLK_DL_MASK, 7)
> >> #define GMAC_CLK_TX_DL_CFG(val)  HIWORD_UPDATE(val, GMAC_CLK_DL_MASK, 0)
> >> 
> >>> +
> >>> +static void set_to_rgmii(struct rk_priv_data *bsp_priv,
> >>> +             int tx_delay, int rx_delay)
> >>> +{
> >>> +    if (IS_ERR(bsp_priv->grf)) {
> >>> +        pr_err("%s: Missing rockchip,grf property\n", __func__);
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
> >>> +             GMAC_PHY_INTF_SEL_RGMII | GMAC_RMII_MODE_CLR);
> >>> +    regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON3,
> >>> +             GMAC_RXCLK_DLY_ENABLE | GMAC_TXCLK_DLY_ENABLE |
> >>> +             GMAC_CLK_RX_DL_CFG(rx_delay) |
> >>> +             GMAC_CLK_TX_DL_CFG(tx_delay));
> >>> +    regmap_write(bsp_priv->grf, RK3288_GRF_GPIO3D_E, GPIO3D_12MA);
> >>> +    regmap_write(bsp_priv->grf, RK3288_GRF_GPIO4A_E, GPIO4A_12MA);
> >>> +    regmap_write(bsp_priv->grf, RK3288_GRF_GPIO4B_E, GPIO4B_2_12MA);
> >> 
> >> please don't write to parts controlled by other drivers - here the drive
> >> strength settings of pins is controlled by the pinctrl driver.
> >> Instead you can
> >> just set the drive-strength in the pinctrl settings.
> >> 
> >>> +
> >>> +    pr_debug("%s: tx delay=0x%x; rx delay=0x%x;\n",
> >>> +         __func__, tx_delay, rx_delay);
> >>> +}
> >>> +
> >>> +static void set_to_rmii(struct rk_priv_data *bsp_priv)
> >>> +{
> >>> +    if (IS_ERR(bsp_priv->grf)) {
> >>> +        pr_err("%s: Missing rockchip,grf property\n", __func__);
> >> 
> >> you have a device-reference in rk_priv_data, so you could use dev_err
> >> here.
> >> Same for all other pr_err/pr_debug/pr_* calls in this file.
> >> 
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
> >>> +             GMAC_PHY_INTF_SEL_RMII);
> >>> +    regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
> >>> +             GMAC_RMII_MODE);
> >> 
> >> these two could be combined?
> >> 
> >>> +}
> >>> +
> >>> +static void set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed)
> >>> +{
> >>> +    if (IS_ERR(bsp_priv->grf)) {
> >>> +        pr_err("%s: Missing rockchip,grf property\n", __func__);
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    if (speed == 10)
> >>> +        regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
> >>> GMAC_CLK_2_5M);
> >>> +    else if (speed == 100)
> >>> +        regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
> >>> GMAC_CLK_25M);
> >>> +    else if (speed == 1000)
> >>> +        regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
> >>> GMAC_CLK_125M);
> >>> +    else
> >>> +        pr_err("unknown speed value for RGMII! speed=%d", speed);
> >>> +}
> >>> +
> >>> +static void set_rmii_speed(struct rk_priv_data *bsp_priv, int speed)
> >>> +{
> >>> +    if (IS_ERR(bsp_priv->grf)) {
> >>> +        pr_err("%s: Missing rockchip,grf property\n", __func__);
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    if (speed == 10) {
> >>> +        regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
> >>> +                 GMAC_RMII_CLK_2_5M);
> >>> +        regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
> >>> +                 GMAC_SPEED_10M);
> >> 
> >> combine into one write?
> >> 
> >>> +    } else if (speed == 100) {
> >>> +        regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
> >>> +                 GMAC_RMII_CLK_25M);
> >>> +        regmap_write(bsp_priv->grf, RK3288_GRF_SOC_CON1,
> >>> +                 GMAC_SPEED_100M);
> >> 
> >> combine into one write?
> >> 
> >>> +    } else {
> >>> +        pr_err("unknown speed value for RMII! speed=%d", speed);
> >>> +    }
> >>> +}
> >>> +
> >>> +#define MAC_CLK_RX    "mac_clk_rx"
> >>> +#define MAC_CLK_TX    "mac_clk_tx"
> >>> +#define CLK_MAC_REF    "clk_mac_ref"
> >>> +#define CLK_MAC_REF_OUT    "clk_mac_refout"
> >>> +#define CLK_MAC_PLL    "clk_mac_pll"
> >>> +#define ACLK_MAC    "aclk_mac"
> >>> +#define PCLK_MAC    "pclk_mac"
> >>> +#define MAC_CLKIN    "ext_gmac"
> >>> +#define CLK_MAC        "stmmaceth"
> >> 
> >> why the need to extra constants for the clock names and not use the
> >> real names
> >> directly like most other drivers do?
> >> 
> >>> +
> >>> +static int gmac_clk_init(struct rk_priv_data *bsp_priv)
> >>> +{
> >>> +    struct device *dev = &bsp_priv->pdev->dev;
> >>> +
> >>> +    bsp_priv->clk_enabled = false;
> >>> +
> >>> +    bsp_priv->mac_clk_rx = clk_get(dev, MAC_CLK_RX);
> >>> +    if (IS_ERR(bsp_priv->mac_clk_rx))
> >>> +        pr_warn("%s: warning: cannot get clock %s\n",
> >>> +            __func__, MAC_CLK_RX);
> >>> +
> >>> +    bsp_priv->mac_clk_tx = clk_get(dev, MAC_CLK_TX);
> >>> +    if (IS_ERR(bsp_priv->mac_clk_tx))
> >>> +        pr_warn("%s: warning: cannot get clock %s\n",
> >>> +            __func__, MAC_CLK_TX);
> >>> +
> >>> +    bsp_priv->clk_mac_ref = clk_get(dev, CLK_MAC_REF);
> >>> +    if (IS_ERR(bsp_priv->clk_mac_ref))
> >>> +        pr_warn("%s: warning: cannot get clock %s\n",
> >>> +            __func__, CLK_MAC_REF);
> >>> +
> >>> +    bsp_priv->clk_mac_refout = clk_get(dev, CLK_MAC_REF_OUT);
> >>> +    if (IS_ERR(bsp_priv->clk_mac_refout))
> >>> +        pr_warn("%s: warning:cannot get clock %s\n",
> >>> +            __func__, CLK_MAC_REF_OUT);
> >>> +
> >>> +    bsp_priv->aclk_mac = clk_get(dev, ACLK_MAC);
> >>> +    if (IS_ERR(bsp_priv->aclk_mac))
> >>> +        pr_warn("%s: warning: cannot get clock %s\n",
> >>> +            __func__, ACLK_MAC);
> >>> +
> >>> +    bsp_priv->pclk_mac = clk_get(dev, PCLK_MAC);
> >>> +    if (IS_ERR(bsp_priv->pclk_mac))
> >>> +        pr_warn("%s: warning: cannot get clock %s\n",
> >>> +            __func__, PCLK_MAC);
> >>> +
> >>> +    bsp_priv->clk_mac_pll = clk_get(dev, CLK_MAC_PLL);
> >>> +    if (IS_ERR(bsp_priv->clk_mac_pll))
> >>> +        pr_warn("%s: warning: cannot get clock %s\n",
> >>> +            __func__, CLK_MAC_PLL);
> >>> +
> >>> +    bsp_priv->gmac_clkin = clk_get(dev, MAC_CLKIN);
> >>> +    if (IS_ERR(bsp_priv->gmac_clkin))
> >>> +        pr_warn("%s: warning: cannot get clock %s\n",
> >>> +            __func__, MAC_CLKIN);
> >>> +
> >>> +    bsp_priv->clk_mac = clk_get(dev, CLK_MAC);
> >>> +    if (IS_ERR(bsp_priv->clk_mac))
> >>> +        pr_warn("%s: warning: cannot get clock %s\n",
> >>> +            __func__, CLK_MAC);
> >> 
> >> there is not clk_put in the _remove case ... maybe you could simply use
> >> devm_clk_get here so that all clocks are put on device removal.
> >> 
> >> Also you're warning on every missing clock. Below it looks like you
> >> need a
> >> different set of them for rgmii and rmii, so maybe you should simply
> >> error out
> >> when core clocks for the selected phy-mode are missing.
> >> 
> >>> +
> >>> +    if (bsp_priv->clock_input) {
> >>> +        pr_info("%s: clock input from PHY\n", __func__);
> >>> +    } else {
> >>> +        if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RMII)
> >>> +            clk_set_rate(bsp_priv->clk_mac_pll, 50000000);
> >>> +
> >>> +        clk_set_parent(bsp_priv->clk_mac, bsp_priv->clk_mac_pll);
> >> 
> >> why the explicit reparenting. The common clock-framework is
> >> intelligent enough
> >> to select the best suitable parent.
> >> 
> >> In general I'm thinking the clock-handling inside this driver should be
> >> simplyfied, as the common-clock framework can handle most cases
> >> itself. I.e. if
> >> a 125MHz external clock is present and so on. But haven't looked to
> >> deep yet.
> >> 
> >>> +    }
> >>> +
> >>> +    return 0;
> >>> +}
> >>> +
> >>> +static int gmac_clk_enable(struct rk_priv_data *bsp_priv, bool enable)
> >>> +{
> >>> +    int phy_iface = phy_iface = bsp_priv->phy_iface;
> >>> +
> >>> +    if (enable) {
> >>> +        if (!bsp_priv->clk_enabled) {
> >>> +            if (phy_iface == PHY_INTERFACE_MODE_RMII) {
> >>> +                if (!IS_ERR(bsp_priv->mac_clk_rx))
> >>> +                    clk_prepare_enable(
> >>> +                        bsp_priv->mac_clk_rx);
> >>> +
> >>> +                if (!IS_ERR(bsp_priv->clk_mac_ref))
> >>> +                    clk_prepare_enable(
> >>> +                        bsp_priv->clk_mac_ref);
> >>> +
> >>> +                if (!IS_ERR(bsp_priv->clk_mac_refout))
> >>> +                    clk_prepare_enable(
> >>> +                        bsp_priv->clk_mac_refout);
> >>> +            }
> >>> +
> >>> +            if (!IS_ERR(bsp_priv->aclk_mac))
> >>> +                clk_prepare_enable(bsp_priv->aclk_mac);
> >>> +
> >>> +            if (!IS_ERR(bsp_priv->pclk_mac))
> >>> +                clk_prepare_enable(bsp_priv->pclk_mac);
> >>> +
> >>> +            if (!IS_ERR(bsp_priv->mac_clk_tx))
> >>> +                clk_prepare_enable(bsp_priv->mac_clk_tx);
> >>> +
> >>> +            /**
> >>> +             * if (!IS_ERR(bsp_priv->clk_mac))
> >>> +             *    clk_prepare_enable(bsp_priv->clk_mac);
> >>> +             */
> >>> +            mdelay(5);
> >>> +            bsp_priv->clk_enabled = true;
> >>> +        }
> >>> +    } else {
> >>> +        if (bsp_priv->clk_enabled) {
> >>> +            if (phy_iface == PHY_INTERFACE_MODE_RMII) {
> >>> +                if (!IS_ERR(bsp_priv->mac_clk_rx))
> >>> +                    clk_disable_unprepare(
> >>> +                        bsp_priv->mac_clk_rx);
> >>> +
> >>> +                if (!IS_ERR(bsp_priv->clk_mac_ref))
> >>> +                    clk_disable_unprepare(
> >>> +                        bsp_priv->clk_mac_ref);
> >>> +
> >>> +                if (!IS_ERR(bsp_priv->clk_mac_refout))
> >>> +                    clk_disable_unprepare(
> >>> +                        bsp_priv->clk_mac_refout);
> >>> +            }
> >>> +
> >>> +            if (!IS_ERR(bsp_priv->aclk_mac))
> >>> +                clk_disable_unprepare(bsp_priv->aclk_mac);
> >>> +
> >>> +            if (!IS_ERR(bsp_priv->pclk_mac))
> >>> +                clk_disable_unprepare(bsp_priv->pclk_mac);
> >>> +
> >>> +            if (!IS_ERR(bsp_priv->mac_clk_tx))
> >>> + clk_disable_unprepare(bsp_priv->mac_clk_tx);
> >>> +            /**
> >>> +             * if (!IS_ERR(bsp_priv->clk_mac))
> >>> +             * clk_disable_unprepare(bsp_priv->clk_mac);
> >>> +             */
> >>> +            bsp_priv->clk_enabled = false;
> >>> +        }
> >>> +    }
> >>> +
> >>> +    return 0;
> >>> +}
> >>> +
> >>> +static int power_on_by_pmu(struct rk_priv_data *bsp_priv, bool enable)
> >>> +{
> >>> +    struct regulator *ldo;
> >>> +    char *ldostr = bsp_priv->pmu_regulator;
> >>> +    int ret;
> >>> +
> >>> +    if (!ldostr) {
> >>> +        pr_err("%s: no ldo found\n", __func__);
> >>> +        return -1;
> >>> +    }
> >>> +
> >>> +    ldo = regulator_get(NULL, ldostr);
> >>> +    if (!ldo) {
> >>> +        pr_err("\n%s get ldo %s failed\n", __func__, ldostr);
> >>> +    } else {
> >>> +        if (enable) {
> >>> +            if (!regulator_is_enabled(ldo)) {
> >>> +                regulator_set_voltage(ldo, 3300000, 3300000);
> >>> +                ret = regulator_enable(ldo);
> >>> +                if (ret != 0)
> >>> +                    pr_err("%s: fail to enable %s\n",
> >>> +                           __func__, ldostr);
> >>> +                else
> >>> +                    pr_info("turn on ldo done.\n");
> >>> +            } else {
> >>> +                pr_warn("%s is enabled before enable", ldostr);
> >>> +            }
> >>> +        } else {
> >>> +            if (regulator_is_enabled(ldo)) {
> >>> +                ret = regulator_disable(ldo);
> >>> +                if (ret != 0)
> >>> +                    pr_err("%s: fail to disable %s\n",
> >>> +                           __func__, ldostr);
> >>> +                else
> >>> +                    pr_info("turn off ldo done.\n");
> >>> +            } else {
> >>> +                pr_warn("%s is disabled before disable",
> >>> +                    ldostr);
> >>> +            }
> >>> +        }
> >>> +        regulator_put(ldo);
> >>> +    }
> >>> +
> >>> +    return 0;
> >>> +}
> >>> +
> >>> +static int power_on_by_gpio(struct rk_priv_data *bsp_priv, bool
> >>> enable)
> >>> +{
> >>> +    if (enable) {
> >>> +        /*power on*/
> >>> +        if (gpio_is_valid(bsp_priv->power_io))
> >>> +            gpio_direction_output(bsp_priv->power_io,
> >>> +                          bsp_priv->power_io_level);
> >>> +    } else {
> >>> +        /*power off*/
> >>> +        if (gpio_is_valid(bsp_priv->power_io))
> >>> +            gpio_direction_output(bsp_priv->power_io,
> >>> +                          !bsp_priv->power_io_level);
> >>> +    }
> >>> +
> >>> +    return 0;
> >>> +}
> >>> +
> >>> +static int phy_power_on(struct rk_priv_data *bsp_priv, bool enable)
> >>> +{
> >>> +    int ret = -1;
> >>> +
> >>> +    pr_info("Ethernet PHY power %s\n", enable == 1 ? "on" : "off");
> >>> +
> >>> +    if (bsp_priv->power_ctrl_by_pmu)
> >>> +        ret = power_on_by_pmu(bsp_priv, enable);
> >>> +    else
> >>> +        ret =  power_on_by_gpio(bsp_priv, enable);
> >> 
> >> this looks wrong. This should always be a regulator. Even a regulator
> >> + switch
> >> controlled by a gpio can still be modelled as regulator, so that you
> >> don't
> >> need this switch and assorted special handling - so just use the
> >> regulator API
> > 
> > In some case, it would be a switching circuit to control the power for
> > PHY.
> > All I need to do is to control a GPIO to make switch on/off. So...
> > 
> >>> +
> >>> +    if (enable) {
> >>> +        /*reset*/
> >>> +        if (gpio_is_valid(bsp_priv->reset_io)) {
> >>> +            gpio_direction_output(bsp_priv->reset_io,
> >>> +                          bsp_priv->reset_io_level);
> >>> +            mdelay(5);
> >>> +            gpio_direction_output(bsp_priv->reset_io,
> >>> +                          !bsp_priv->reset_io_level);
> >>> +        }
> >>> +        mdelay(30);
> >>> +
> >>> +    } else {
> >>> +        /*pull down reset*/
> >>> +        if (gpio_is_valid(bsp_priv->reset_io)) {
> >>> +            gpio_direction_output(bsp_priv->reset_io,
> >>> +                          bsp_priv->reset_io_level);
> >>> +        }
> >>> +    }
> >> 
> >> I'm not sure yet if it would be better to use the reset framework for
> >> this.
> >> While it says it is also meant for reset-gpios, there does not seem a
> >> driver
> >> for this to exist yet.
> > 
> > What should I do?
> > 
> >>> +
> >>> +    return ret;
> >>> +}
> >>> +
> >>> +#define GPIO_PHY_POWER    "gmac_phy_power"
> >>> +#define GPIO_PHY_RESET    "gmac_phy_reset"
> >>> +#define GPIO_PHY_IRQ    "gmac_phy_irq"
> >> 
> >> again I don't understand why these constants are necessary
> >> 
> >>> +
> >>> +static void *rk_gmac_setup(struct platform_device *pdev)
> >>> +{
> >>> +    struct rk_priv_data *bsp_priv;
> >>> +    struct device *dev = &pdev->dev;
> >>> +    enum of_gpio_flags flags;
> >>> +    int ret;
> >>> +    const char *strings = NULL;
> >>> +    int value;
> >>> +    int irq;
> >>> +
> >>> +    bsp_priv = devm_kzalloc(dev, sizeof(*bsp_priv), GFP_KERNEL);
> >>> +    if (!bsp_priv)
> >>> +        return ERR_PTR(-ENOMEM);
> >>> +
> >>> +    bsp_priv->phy_iface = of_get_phy_mode(dev->of_node);
> >>> +
> >>> +    ret = of_property_read_string(dev->of_node, "pmu_regulator",
> >>> &strings);
> >>> +    if (ret) {
> >>> +        pr_err("%s: Can not read property: pmu_regulator.\n",
> >>> __func__);
> >>> +        bsp_priv->power_ctrl_by_pmu = false;
> >>> +    } else {
> >>> +        pr_info("%s: ethernet phy power controlled by pmu(%s).\n",
> >>> +            __func__, strings);
> >>> +        bsp_priv->power_ctrl_by_pmu = true;
> >>> +        strcpy(bsp_priv->pmu_regulator, strings);
> >>> +    }
> >> 
> >> There is a generic regulator-dt-binding for regulator-consumers
> >> available
> >> which you should of course use.
> > 
> > The same explanation as above
> > 
> >>> +
> >>> +    ret = of_property_read_u32(dev->of_node, "pmu_enable_level",
> >>> &value);
> >>> +    if (ret) {
> >>> +        pr_err("%s: Can not read property: pmu_enable_level.\n",
> >>> +               __func__);
> >>> +        bsp_priv->power_ctrl_by_pmu = false;
> >>> +    } else {
> >>> +        pr_info("%s: PHY power controlled by pmu(level = %s).\n",
> >>> +            __func__, (value == 1) ? "HIGH" : "LOW");
> >>> +        bsp_priv->power_ctrl_by_pmu = true;
> >>> +        bsp_priv->pmu_enable_level = value;
> >>> +    }
> >> 
> >> What is this used for? Enabling should of course be done via
> >> regulator_enable
> >> and disabling using regulator_disable.
> >> 
> >>> +
> >>> +    ret = of_property_read_string(dev->of_node, "clock_in_out",
> >>> &strings);
> >>> +    if (ret) {
> >>> +        pr_err("%s: Can not read property: clock_in_out.\n",
> >>> __func__);
> >>> +        bsp_priv->clock_input = true;
> >>> +    } else {
> >>> +        pr_info("%s: clock input or output? (%s).\n",
> >>> +            __func__, strings);
> >>> +        if (!strcmp(strings, "input"))
> >>> +            bsp_priv->clock_input = true;
> >>> +        else
> >>> +            bsp_priv->clock_input = false;
> >>> +    }
> >>> +
> >>> +    ret = of_property_read_u32(dev->of_node, "tx_delay", &value);
> >>> +    if (ret) {
> >>> +        bsp_priv->tx_delay = 0x30;
> >>> +        pr_err("%s: Can not read property: tx_delay.", __func__);
> >>> +        pr_err("%s: set tx_delay to 0x%x\n",
> >>> +               __func__, bsp_priv->tx_delay);
> >>> +    } else {
> >>> +        pr_info("%s: TX delay(0x%x).\n", __func__, value);
> >>> +        bsp_priv->tx_delay = value;
> >>> +    }
> >>> +
> >>> +    ret = of_property_read_u32(dev->of_node, "rx_delay", &value);
> >>> +    if (ret) {
> >>> +        bsp_priv->rx_delay = 0x10;
> >>> +        pr_err("%s: Can not read property: rx_delay.", __func__);
> >>> +        pr_err("%s: set rx_delay to 0x%x\n",
> >>> +               __func__, bsp_priv->rx_delay);
> >>> +    } else {
> >>> +        pr_info("%s: RX delay(0x%x).\n", __func__, value);
> >>> +        bsp_priv->rx_delay = value;
> >>> +    }
> >>> +
> >>> +    bsp_priv->grf = syscon_regmap_lookup_by_phandle(dev->of_node,
> >>> +                            "rockchip,grf");
> >>> +    bsp_priv->phyirq_io =
> >>> +        of_get_named_gpio_flags(dev->of_node,
> >>> +                    "phyirq-gpio", 0, &flags);
> >>> +    bsp_priv->phyirq_io_level = (flags & OF_GPIO_ACTIVE_LOW) ? 0 : 1;
> >>> +
> >>> +    bsp_priv->reset_io =
> >>> +        of_get_named_gpio_flags(dev->of_node,
> >>> +                    "reset-gpio", 0, &flags);
> >>> +    bsp_priv->reset_io_level = (flags & OF_GPIO_ACTIVE_LOW) ? 0 : 1;
> >>> +
> >>> +    bsp_priv->power_io =
> >>> +        of_get_named_gpio_flags(dev->of_node, "power-gpio", 0,
> >>> &flags);
> >>> +    bsp_priv->power_io_level = (flags & OF_GPIO_ACTIVE_LOW) ? 0 : 1;
> >>> +
> >>> +    /*power*/
> >>> +    if (!gpio_is_valid(bsp_priv->power_io)) {
> >>> +        pr_err("%s: Failed to get GPIO %s.\n",
> >>> +               __func__, "power-gpio");
> >>> +    } else {
> >>> +        ret = gpio_request(bsp_priv->power_io, GPIO_PHY_POWER);
> >>> +        if (ret)
> >>> +            pr_err("%s: ERROR: Failed to request GPIO %s.\n",
> >>> +                   __func__, GPIO_PHY_POWER);
> >>> +    }
> >> 
> >> When everything power-related is handled using the regulator api, you
> >> don't
> >> need this
> > 
> > The same explanation as above
> > 
> >>> +
> >>> +    if (!gpio_is_valid(bsp_priv->reset_io)) {
> >>> +        pr_err("%s: ERROR: Get reset-gpio failed.\n", __func__);
> >>> +    } else {
> >>> +        ret = gpio_request(bsp_priv->reset_io, GPIO_PHY_RESET);
> >>> +        if (ret)
> >>> +            pr_err("%s: ERROR: Failed to request GPIO %s.\n",
> >>> +                   __func__, GPIO_PHY_RESET);
> >>> +    }
> >>> +
> >>> +    if (bsp_priv->phyirq_io > 0) {
> >> 
> >> This is more for my understanding: why does the mac driver need to
> >> handle the
> >> phy interrupt - but I might be overlooking something.
> > 
> > phy interrupt is not mandatory.  In most of the time,  in order to
> > find something happen in PHY, for example,
> > link is up or down,  we just use polling method to read the phy's
> > register in a timer.
> > Buf if phy interrupt is in use, when link is up or down,  phy
> > interrupt pin will be assert to inform the CPU.
> > I just implement the driver for phy interrupt gpio,  not enable it as
> > default.
> > 
> >>> +        struct plat_stmmacenet_data *plat_dat;
> >>> +
> >>> +        pr_info("PHY irq in use\n");
> >>> +        ret = gpio_request(bsp_priv->phyirq_io, GPIO_PHY_IRQ);
> >>> +        if (ret < 0) {
> >>> +            pr_warn("%s: Failed to request GPIO %s\n",
> >>> +                __func__, GPIO_PHY_IRQ);
> >>> +            goto goon;
> >>> +        }
> >>> +
> >>> +        ret = gpio_direction_input(bsp_priv->phyirq_io);
> >>> +        if (ret < 0) {
> >>> +            pr_err("%s, Failed to set input for GPIO %s\n",
> >>> +                   __func__, GPIO_PHY_IRQ);
> >>> +            gpio_free(bsp_priv->phyirq_io);
> >>> +            goto goon;
> >>> +        }
> >>> +
> >>> +        irq = gpio_to_irq(bsp_priv->phyirq_io);
> >>> +        if (irq < 0) {
> >>> +            ret = irq;
> >>> +            pr_err("Failed to set irq for %s\n",
> >>> +                   GPIO_PHY_IRQ);
> >>> +            gpio_free(bsp_priv->phyirq_io);
> >>> +            goto goon;
> >>> +        }
> >>> +
> >>> +        plat_dat = dev_get_platdata(&pdev->dev);
> >>> +        if (plat_dat)
> >>> +            plat_dat->mdio_bus_data->probed_phy_irq = irq;
> >>> +        else
> >>> +            pr_err("%s: plat_data is NULL\n", __func__);
> >>> +    }
> >>> +
> >>> +goon:
> >>> +    /*rmii or rgmii*/
> >>> +    if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RGMII) {
> >>> +        pr_info("%s: init for RGMII\n", __func__);
> >>> +        set_to_rgmii(bsp_priv, bsp_priv->tx_delay,
> >>> bsp_priv->rx_delay);
> >>> +    } else if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RMII) {
> >>> +        pr_info("%s: init for RMII\n", __func__);
> >>> +        set_to_rmii(bsp_priv);
> >>> +    } else {
> >>> +        pr_err("%s: ERROR: NO interface defined!\n", __func__);
> >>> +    }
> >>> +
> >>> +    bsp_priv->pdev = pdev;
> >>> +
> >>> +    gmac_clk_init(bsp_priv);
> >>> +
> >>> +    return bsp_priv;
> >>> +}
> >>> +
> >>> +static int rk_gmac_init(struct platform_device *pdev, void *priv)
> >>> +{
> >>> +    struct rk_priv_data *bsp_priv = priv;
> >>> +    int ret;
> >>> +
> >>> +    ret = phy_power_on(bsp_priv, true);
> >>> +    if (ret)
> >>> +        return ret;
> >>> +
> >>> +    ret = gmac_clk_enable(bsp_priv, true);
> >>> +    if (ret)
> >>> +        return ret;
> >>> +
> >>> +    return 0;
> >>> +}
> >>> +
> >>> +static void rk_gmac_exit(struct platform_device *pdev, void *priv)
> >>> +{
> >>> +    struct rk_priv_data *gmac = priv;
> >>> +
> >>> +    phy_power_on(gmac, false);
> >>> +    gmac_clk_enable(gmac, false);
> >>> +}
> >>> +
> >>> +static void rk_fix_speed(void *priv, unsigned int speed)
> >>> +{
> >>> +    struct rk_priv_data *bsp_priv = priv;
> >>> +
> >>> +    if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RGMII)
> >>> +        set_rgmii_speed(bsp_priv, speed);
> >>> +    else if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RMII)
> >>> +        set_rmii_speed(bsp_priv, speed);
> >>> +    else
> >>> +        pr_err("unsupported interface %d", bsp_priv->phy_iface);
> >>> +}
> >>> +
> >>> +const struct stmmac_of_data rk_gmac_data = {
> >>> +    .has_gmac = 1,
> >>> +    .fix_mac_speed = rk_fix_speed,
> >>> +    .setup = rk_gmac_setup,
> >>> +    .init = rk_gmac_init,
> >>> +    .exit = rk_gmac_exit,
> >>> +};
> >>> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
> >>> b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index
> >>> 15814b7..b4dee96 100644
> >>> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
> >>> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
> >>> @@ -33,6 +33,7 @@
> >>> 
> >>>   static const struct of_device_id stmmac_dt_ids[] = {
> >>>   
> >>>       /* SoC specific glue layers should come before generic
> >>> 
> >>> bindings */
> >>> +    { .compatible = "rockchip,rk3288-gmac", .data = &rk_gmac_data},
> >> 
> >> please name that rk3288_gmac_data [of course the other occurences too]
> >> It makes it easier to see which soc it is meant for and it's also not
> >> save to
> >> assume that the next one will use the same register + bit positions
> >> in the
> >> grf.
> >> 
> >>>       { .compatible = "amlogic,meson6-dwmac", .data =
> >>> 
> >>> &meson6_dwmac_data},
> >>> 
> >>>       { .compatible = "allwinner,sun7i-a20-gmac", .data =
> >>> 
> >>> &sun7i_gmac_data},
> >>> 
> >>>       { .compatible = "st,stih415-dwmac", .data = &stih4xx_dwmac_data},
> >>> 
> >>> @@ -291,6 +292,8 @@ static int stmmac_pltfr_probe(struct
> >>> platform_device
> >>> *pdev) return  -ENOMEM;
> >>> 
> >>>           }
> >>> 
> >>> +        pdev->dev.platform_data = plat_dat;
> >>> +
> >>> 
> >>>           ret = stmmac_probe_config_dt(pdev, plat_dat, &mac);
> >>>           if (ret) {
> >>>           
> >>>               pr_err("%s: main dt probe failed", __func__);
> >>> 
> >>> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
> >>> b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h index
> >>> 25dd1f7..32a0516 100644
> >>> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
> >>> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
> >>> @@ -24,5 +24,6 @@ extern const struct stmmac_of_data sun7i_gmac_data;
> >>> 
> >>>   extern const struct stmmac_of_data stih4xx_dwmac_data;
> >>>   extern const struct stmmac_of_data stid127_dwmac_data;
> >>>   extern const struct stmmac_of_data socfpga_gmac_data;
> >>> 
> >>> +extern const struct stmmac_of_data rk_gmac_data;
> >>> 
> >>>   #endif /* __STMMAC_PLATFORM_H__ */

^ permalink raw reply

* *****SPAM***** READ.
From: SK @ 2014-12-28  9:28 UTC (permalink / raw)
  To: Recipients

This is to request your assistance to execute a lucrative project in Asia. Kindly respond.

^ permalink raw reply

* Re: [PATCH net-next] net: stmmac: add BQL support
From: Dave Taht @ 2014-12-28 16:25 UTC (permalink / raw)
  To: Beniamino Galvani
  Cc: Giuseppe Cavallaro, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org
In-Reply-To: <1419778631-23067-1-git-send-email-b.galvani@gmail.com>

On Sun, Dec 28, 2014 at 6:57 AM, Beniamino Galvani <b.galvani@gmail.com> wrote:
> Add support for Byte Queue Limits to the STMicro MAC driver.

Thank you!

> Tested on a Amlogic S805 Cortex-A5 board, where the use of BQL
> slightly decreases the ping latency from ~10ms to ~3ms when the
> 100Mbps link is saturated by TCP streams. No difference is
> observed at 1Gbps.

I see the plural. With TSQ in place it is hard (without something like
the rrul test driving multiple streams) to drive a driver to
saturation with small numbers of flows. This was with pfifo_fast, not
sch_fq, at 100mbit?

Can this board actually drive a full gigabit in the first place? Until
now most of the low end arm boards I have seen only came with
a 100mbit mac, and the gig ones lacking offloads seemed to peak
out at about 600mbit.

Under my christmas tree landed a quad core A5 (odroid-c1), also an
xgene and zedboard - both of the latter are a-needing BQL,
and I haven't booted the udroid yet. Hopefully it is the
same driver you just improved.

(https://plus.google.com/u/0/107942175615993706558/posts/f1D43umhm7E )

> Signed-off-by: Beniamino Galvani <b.galvani@gmail.com>
> ---
>  drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 9 +++++++++
>  1 file changed, 9 insertions(+)
>
> diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> index 118a427..c5af3d8 100644
> --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
> @@ -1097,6 +1097,7 @@ static int init_dma_desc_rings(struct net_device *dev, gfp_t flags)
>
>         priv->dirty_tx = 0;
>         priv->cur_tx = 0;
> +       netdev_reset_queue(priv->dev);
>
>         stmmac_clear_descriptors(priv);
>
> @@ -1300,6 +1301,7 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
>  static void stmmac_tx_clean(struct stmmac_priv *priv)
>  {
>         unsigned int txsize = priv->dma_tx_size;
> +       unsigned int bytes_compl = 0, pkts_compl = 0;
>
>         spin_lock(&priv->tx_lock);
>
> @@ -1356,6 +1358,8 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
>                 priv->hw->mode->clean_desc3(priv, p);
>
>                 if (likely(skb != NULL)) {
> +                       pkts_compl++;
> +                       bytes_compl += skb->len;
>                         dev_consume_skb_any(skb);
>                         priv->tx_skbuff[entry] = NULL;
>                 }
> @@ -1364,6 +1368,9 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
>
>                 priv->dirty_tx++;
>         }
> +
> +       netdev_completed_queue(priv->dev, pkts_compl, bytes_compl);
> +
>         if (unlikely(netif_queue_stopped(priv->dev) &&
>                      stmmac_tx_avail(priv) > STMMAC_TX_THRESH(priv))) {
>                 netif_tx_lock(priv->dev);
> @@ -1418,6 +1425,7 @@ static void stmmac_tx_err(struct stmmac_priv *priv)
>                                                      (i == txsize - 1));
>         priv->dirty_tx = 0;
>         priv->cur_tx = 0;
> +       netdev_reset_queue(priv->dev);
>         priv->hw->dma->start_tx(priv->ioaddr);
>
>         priv->dev->stats.tx_errors++;
> @@ -2049,6 +2057,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
>                 skb_tx_timestamp(skb);
>
>         priv->hw->dma->enable_dma_transmission(priv->ioaddr);
> +       netdev_sent_queue(dev, skb->len);
>
>         spin_unlock(&priv->tx_lock);
>         return NETDEV_TX_OK;
> --
> 2.1.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Dave Täht

thttp://www.bufferbloat.net/projects/bloat/wiki/Upcoming_Talks

^ permalink raw reply

* RE: [scsi/net-next]Pull csiostor from net-next
From: Praveen Madhavan @ 2014-12-28 15:13 UTC (permalink / raw)
  To: hch@infradead.org; +Cc: linux-scsi@vger.kernel.org, netdev@vger.kernel.org
In-Reply-To: <20141227150917.GA16964@infradead.org>

> How much do you plan to send for the 3.20 window?  I'd rather avoid
> having to pull in the net-next tree and merge everything through Dave
> if that seems feasible.
I hv couple of patches fixes for 3.20 window. Can you please pull from linux-next tree ?

________________________________________
From: linux-scsi-owner@vger.kernel.org [linux-scsi-owner@vger.kernel.org] on behalf of hch@infradead.org [hch@infradead.org]
Sent: Saturday, December 27, 2014 8:39 PM
To: Praveen Madhavan
Cc: linux-scsi@vger.kernel.org; netdev@vger.kernel.org
Subject: Re: [scsi/net-next]Pull csiostor from net-next

[fixing the netdev address]

On Sat, Dec 27, 2014 at 06:59:56AM -0800, hch@infradead.org wrote:
> On Wed, Dec 24, 2014 at 05:51:27PM +0000, Praveen Madhavan wrote:
> > Hi Christoph,
> > Can u please pull the csiostor changes from net-next ?. I need these changes to submit next fixes in scsi tree.
>
> How much do you plan to send for the 3.20 window?  I'd rather avoid
> having to pull in the net-next tree and merge everything through Dave
> if that seems feasible.
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
---end quoted text---
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


^ permalink raw reply

* [PATCH net-next] net: stmmac: add BQL support
From: Beniamino Galvani @ 2014-12-28 14:57 UTC (permalink / raw)
  To: David S. Miller
  Cc: Giuseppe Cavallaro, netdev, linux-kernel, Beniamino Galvani

Add support for Byte Queue Limits to the STMicro MAC driver.

Tested on a Amlogic S805 Cortex-A5 board, where the use of BQL
slightly decreases the ping latency from ~10ms to ~3ms when the
100Mbps link is saturated by TCP streams. No difference is
observed at 1Gbps.

Signed-off-by: Beniamino Galvani <b.galvani@gmail.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 118a427..c5af3d8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1097,6 +1097,7 @@ static int init_dma_desc_rings(struct net_device *dev, gfp_t flags)
 
 	priv->dirty_tx = 0;
 	priv->cur_tx = 0;
+	netdev_reset_queue(priv->dev);
 
 	stmmac_clear_descriptors(priv);
 
@@ -1300,6 +1301,7 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv)
 static void stmmac_tx_clean(struct stmmac_priv *priv)
 {
 	unsigned int txsize = priv->dma_tx_size;
+	unsigned int bytes_compl = 0, pkts_compl = 0;
 
 	spin_lock(&priv->tx_lock);
 
@@ -1356,6 +1358,8 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
 		priv->hw->mode->clean_desc3(priv, p);
 
 		if (likely(skb != NULL)) {
+			pkts_compl++;
+			bytes_compl += skb->len;
 			dev_consume_skb_any(skb);
 			priv->tx_skbuff[entry] = NULL;
 		}
@@ -1364,6 +1368,9 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
 
 		priv->dirty_tx++;
 	}
+
+	netdev_completed_queue(priv->dev, pkts_compl, bytes_compl);
+
 	if (unlikely(netif_queue_stopped(priv->dev) &&
 		     stmmac_tx_avail(priv) > STMMAC_TX_THRESH(priv))) {
 		netif_tx_lock(priv->dev);
@@ -1418,6 +1425,7 @@ static void stmmac_tx_err(struct stmmac_priv *priv)
 						     (i == txsize - 1));
 	priv->dirty_tx = 0;
 	priv->cur_tx = 0;
+	netdev_reset_queue(priv->dev);
 	priv->hw->dma->start_tx(priv->ioaddr);
 
 	priv->dev->stats.tx_errors++;
@@ -2049,6 +2057,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		skb_tx_timestamp(skb);
 
 	priv->hw->dma->enable_dma_transmission(priv->ioaddr);
+	netdev_sent_queue(dev, skb->len);
 
 	spin_unlock(&priv->tx_lock);
 	return NETDEV_TX_OK;
-- 
2.1.4

^ permalink raw reply related

* refactoring cdc_ncm
From: Enrico Mioso @ 2014-12-28 14:53 UTC (permalink / raw)
  To: Bjørn Mork; +Cc: linux-usb@vger.kernel.org, netdev@vger.kernel.org
In-Reply-To: <8761dqjuuh.fsf@nemi.mork.no>

Hello everyone, hello Bjorn.
Sorry for my prevous private mail, didn't think about it.

So I was wondering how it could be possible to refactor the cdc_ncm.c driver to 
queue frames and only when enough data was collected, generate a full NCM 
packet.

so I am trying to get clear what's the way to go.
My idea was to try to not change the layout of the code so much: I would like 
not breaking cdc_mbim.c and other code sharing some of these functions, so all 
of the work would be done in the cdc_ncm_fill_tx_frame function.

Before starting with code, I would like to organize ideas:
- when an SKB comes in, if the queue isn't empty, I would queue it
   Somethink like
   if (skb_queue_empty(ctx->skb_tx_queue)){
     skb_insert(skb);
   } else {
     ready2send=1;
   }
- at this point, performs some check to see if we have enough data:how can I do
   so? What should I check? Sizes of nth16 + ntb16 + ... ?

   - if not enough data is present, exit the function returning NULL
   else
   - invoke some functions to prepare the NCM packet

I plan to move / rewrite the needed code to isolate it completely from the 
queue logic: making it also a little bit more clear. I would like to have 
separate functions for any parts of the NCM packet construction.
This would allow better flexibility and probably less maintenance burden in the 
long run, and might open the road to extending the driver to support 32-bit NCM 
and different signness handling.

In the current code, we are carrying around the "sign" variable in the 
cdc_ncm_fill_tx_frame: why? Can different NCM packets have different sign 
settings?
Thank you guys, waiting for your replies,
Enrico

^ permalink raw reply

* [PATCH iproute2] man ss: Add state filter description
From: Vadim Kochan @ 2014-12-28 11:39 UTC (permalink / raw)
  To: netdev; +Cc: Vadim Kochan

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=y, Size: 2191 bytes --]

From: Vadim Kochan <vadim4j@gmail.com>

Stolen from generated doc/ss.html
Also added reference to RFC 739 for TCP states.

Signed-off-by: Vadim Kochan <vadim4j@gmail.com>
---
 man/man8/ss.8 | 41 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 39 insertions(+), 2 deletions(-)

diff --git a/man/man8/ss.8 b/man/man8/ss.8
index 2a4bbc5..f4b709b 100644
--- a/man/man8/ss.8
+++ b/man/man8/ss.8
@@ -127,8 +127,41 @@ Do not display anything, just dump raw information about TCP sockets to FILE aft
 Read filter information from FILE.
 Each line of FILE is interpreted like single command line option. If FILE is - stdin is used.
 .TP
-.B FILTER := [ state TCP-STATE ] [ EXPRESSION ]
+.B FILTER := [ state STATE-FILTER ] [ EXPRESSION ]
 Please take a look at the official documentation (Debian package iproute-doc) for details regarding filters.
+
+.SH STATE-FILTER
+
+.B STATE-FILTER
+allows to construct arbitrary set of states to match. Its syntax is sequence of keywords state and exclude followed by identifier of state.
+.TP
+Available identifiers are:
+
+All standard TCP states:
+.BR established ", " syn-sent ", " syn-recv ", " fin-wait-1 ", " fin-wait-2 ", " time-wait ", " closed ", " close-wait ", " last-ack ", "
+.BR  listen " and " closing.
+
+.B all
+- for all the states
+
+.B connected
+- all the states except for
+.BR listen " and " closed
+
+.B synchronized
+- all the
+.B connected
+states except for
+.B syn-sent
+
+.B bucket
+- states, which are maintained as minisockets, i.e.
+.BR time-wait " and " syn-recv
+
+.B big
+- opposite to
+.B bucket
+
 .SH USAGE EXAMPLES
 .TP
 .B ss -t -a
@@ -150,7 +183,11 @@ Find all local processes connected to X server.
 List all the tcp sockets in state FIN-WAIT-1 for our apache to network 193.233.7/24 and look at their timers.
 .SH SEE ALSO
 .BR ip (8),
-.BR /usr/share/doc/iproute-doc/ss.html " (package iproute­doc)"
+.BR /usr/share/doc/iproute-doc/ss.html " (package iproute­doc)",
+.br
+.BR RFC " 793 "
+- https://tools.ietf.org/rfc/rfc793.txt (TCP states) 
+
 .SH AUTHOR
 .I ss 
 was written by Alexey Kuznetosv, <kuznet@ms2.inr.ac.ru>.
-- 
2.1.3

^ permalink raw reply related

* [PATCH iproute2] man tc: Add description for -graph option
From: Vadim Kochan @ 2014-12-28 10:33 UTC (permalink / raw)
  To: netdev; +Cc: Vadim Kochan

From: Vadim Kochan <vadim4j@gmail.com>

Signed-off-by: Vadim Kochan <vadim4j@gmail.com>
---
 man/man8/tc.8 | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/man/man8/tc.8 b/man/man8/tc.8
index d8f974f..a6aed0a 100644
--- a/man/man8/tc.8
+++ b/man/man8/tc.8
@@ -70,7 +70,8 @@ DEV
 \fB\-d\fR[\fIetails\fR] |
 \fB\-r\fR[\fIaw\fR] |
 \fB\-p\fR[\fIretty\fR] |
-\fB\-i\fR[\fIec\fR] }
+\fB\-i\fR[\fIec\fR] |
+\fB\-g\fR[\fIraph\fR] }
 
 .SH DESCRIPTION
 .B Tc
@@ -475,6 +476,25 @@ decode filter offset and mask values to equivalent filter commands based on TCP/
 .BR "\-iec"
 print rates in IEC units (ie. 1K = 1024).
 
+.TP
+.BR "\-g", " \-graph"
+shows classes as ASCII graph. Prints generic stats info under each class if
+.BR "-s"
+option was specified. Classes can be filtered only by
+.BR "dev"
+option.
+
+.SH "EXAMPLES"
+.PP
+tc -g class show dev eth0
+.RS 4
+Shows classes as ASCII graph on eth0 interface.
+.RE
+.PP
+tc -g -s class show dev eth0
+.RS 4
+Shows classes as ASCII graph with stats info under each class.
+
 .SH HISTORY
 .B tc
 was written by Alexey N. Kuznetsov and added in Linux 2.2.
-- 
2.1.3

^ permalink raw reply related

* [PATCH iproute2] ip: Small corrections of '-tshort' option in usage
From: Vadim Kochan @ 2014-12-28  9:47 UTC (permalink / raw)
  To: netdev; +Cc: Vadim Kochan

From: Vadim Kochan <vadim4j@gmail.com>

Fixed -t[short] to -ts[hort] as '-t' is related to
-timestamp option.

Signed-off-by: Vadim Kochan <vadim4j@gmail.com>
---
 ip/ip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ip/ip.c b/ip/ip.c
index 61cc9c3..850a001 100644
--- a/ip/ip.c
+++ b/ip/ip.c
@@ -54,7 +54,7 @@ static void usage(void)
 "                    -f[amily] { inet | inet6 | ipx | dnet | bridge | link } |\n"
 "                    -4 | -6 | -I | -D | -B | -0 |\n"
 "                    -l[oops] { maximum-addr-flush-attempts } |\n"
-"                    -o[neline] | -t[imestamp] | -t[short] | -b[atch] [filename] |\n"
+"                    -o[neline] | -t[imestamp] | -ts[hort] | -b[atch] [filename] |\n"
 "                    -rc[vbuf] [size] | -n[etns] name }\n");
 	exit(-1);
 }
-- 
2.1.3

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox