Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH net-next 4/4] geneve: Check family when reusing sockets.
From: Jesse Gross @ 2015-01-03  2:26 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <1420251965-44794-1-git-send-email-jesse@nicira.com>

When searching for an existing socket to reuse, the address family
is not taken into account - only port number. This means that an
IPv4 socket could be used for IPv6 traffic and vice versa, which
is sure to cause problems when passing packets.

It is not possible to trigger this problem currently because the
only user of Geneve creates just IPv4 sockets. However, that is
likely to change in the near future.

Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 net/ipv4/geneve.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 4fe5a59..5b52046 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -65,14 +65,15 @@ static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
 	return (struct genevehdr *)(udp_hdr(skb) + 1);
 }

-/* Find geneve socket based on network namespace and UDP port */
-static struct geneve_sock *geneve_find_sock(struct net *net, __be16 port)
+static struct geneve_sock *geneve_find_sock(struct net *net,
+					    sa_family_t family, __be16 port)
 {
 	struct geneve_net *gn = net_generic(net, geneve_net_id);
 	struct geneve_sock *gs;

 	list_for_each_entry(gs, &gn->sock_list, list) {
-		if (inet_sk(gs->sock->sk)->inet_sport == port)
+		if (inet_sk(gs->sock->sk)->inet_sport == port &&
+		    inet_sk(gs->sock->sk)->sk.sk_family == family)
 			return gs;
 	}

@@ -375,7 +376,7 @@ struct geneve_sock *geneve_sock_add(struct net *net, __be16 port,

 	mutex_lock(&geneve_mutex);

-	gs = geneve_find_sock(net, port);
+	gs = geneve_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port);
 	if (gs) {
 		if (!no_share && gs->rcv == rcv)
 			gs->refcnt++;
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next 2/4] geneve: Simplify locking.
From: Jesse Gross @ 2015-01-03  2:26 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <1420251965-44794-1-git-send-email-jesse@nicira.com>

The existing Geneve locking scheme was pulled over directly from
VXLAN. However, VXLAN has a number of built in mechanisms which make
the locking more complex and are unlikely to be necessary with Geneve.
This simplifies the locking to use a basic scheme of a mutex
when doing updates plus RCU on receive.

In addition to making the code easier to read, this also avoids the
possibility of a race when creating or destroying sockets since
UDP sockets and the list of Geneve sockets are protected by different
locks. After this change, the entire operation is atomic.

Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 include/net/geneve.h |  2 +-
 net/ipv4/geneve.c    | 59 +++++++++++++++++++++++-----------------------------
 2 files changed, 27 insertions(+), 34 deletions(-)

diff --git a/include/net/geneve.h b/include/net/geneve.h
index 56c7e1a..b40f4af 100644
--- a/include/net/geneve.h
+++ b/include/net/geneve.h
@@ -73,7 +73,7 @@ struct geneve_sock {
 	void			*rcv_data;
 	struct socket		*sock;
 	struct rcu_head		rcu;
-	atomic_t		refcnt;
+	int			refcnt;
 	struct udp_offload	udp_offloads;
 };
 
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 136a829..ad8dbae 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -17,7 +17,7 @@
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/skbuff.h>
-#include <linux/rculist.h>
+#include <linux/list.h>
 #include <linux/netdevice.h>
 #include <linux/in.h>
 #include <linux/ip.h>
@@ -28,6 +28,7 @@
 #include <linux/if_vlan.h>
 #include <linux/hash.h>
 #include <linux/ethtool.h>
+#include <linux/mutex.h>
 #include <net/arp.h>
 #include <net/ndisc.h>
 #include <net/ip.h>
@@ -50,13 +51,15 @@
 #include <net/ip6_checksum.h>
 #endif
 
+/* Protects sock_list and refcounts. */
+static DEFINE_MUTEX(geneve_mutex);
+
 #define PORT_HASH_BITS 8
 #define PORT_HASH_SIZE (1<<PORT_HASH_BITS)
 
 /* per-network namespace private data for this module */
 struct geneve_net {
 	struct hlist_head	sock_list[PORT_HASH_SIZE];
-	spinlock_t		sock_lock;   /* Protects sock_list */
 };
 
 static int geneve_net_id;
@@ -78,7 +81,7 @@ static struct geneve_sock *geneve_find_sock(struct net *net, __be16 port)
 {
 	struct geneve_sock *gs;
 
-	hlist_for_each_entry_rcu(gs, gs_head(net, port), hlist) {
+	hlist_for_each_entry(gs, gs_head(net, port), hlist) {
 		if (inet_sk(gs->sock->sk)->inet_sport == port)
 			return gs;
 	}
@@ -336,7 +339,6 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 						geneve_rcv_t *rcv, void *data,
 						bool ipv6)
 {
-	struct geneve_net *gn = net_generic(net, geneve_net_id);
 	struct geneve_sock *gs;
 	struct socket *sock;
 	struct udp_tunnel_sock_cfg tunnel_cfg;
@@ -352,7 +354,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 	}
 
 	gs->sock = sock;
-	atomic_set(&gs->refcnt, 1);
+	gs->refcnt = 1;
 	gs->rcv = rcv;
 	gs->rcv_data = data;
 
@@ -360,11 +362,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 	gs->udp_offloads.port = port;
 	gs->udp_offloads.callbacks.gro_receive  = geneve_gro_receive;
 	gs->udp_offloads.callbacks.gro_complete = geneve_gro_complete;
-
-	spin_lock(&gn->sock_lock);
-	hlist_add_head_rcu(&gs->hlist, gs_head(net, port));
 	geneve_notify_add_rx_port(gs);
-	spin_unlock(&gn->sock_lock);
 
 	/* Mark socket as an encapsulation socket */
 	tunnel_cfg.sk_user_data = gs;
@@ -373,6 +371,8 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 	tunnel_cfg.encap_destroy = NULL;
 	setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
 
+	hlist_add_head(&gs->hlist, gs_head(net, port));
+
 	return gs;
 }
 
@@ -380,25 +380,21 @@ struct geneve_sock *geneve_sock_add(struct net *net, __be16 port,
 				    geneve_rcv_t *rcv, void *data,
 				    bool no_share, bool ipv6)
 {
-	struct geneve_net *gn = net_generic(net, geneve_net_id);
 	struct geneve_sock *gs;
 
-	gs = geneve_socket_create(net, port, rcv, data, ipv6);
-	if (!IS_ERR(gs))
-		return gs;
-
-	if (no_share)	/* Return error if sharing is not allowed. */
-		return ERR_PTR(-EINVAL);
+	mutex_lock(&geneve_mutex);
 
-	spin_lock(&gn->sock_lock);
 	gs = geneve_find_sock(net, port);
-	if (gs && ((gs->rcv != rcv) ||
-		   !atomic_add_unless(&gs->refcnt, 1, 0)))
+	if (gs) {
+		if (!no_share && gs->rcv == rcv)
+			gs->refcnt++;
+		else
 			gs = ERR_PTR(-EBUSY);
-	spin_unlock(&gn->sock_lock);
+	} else {
+		gs = geneve_socket_create(net, port, rcv, data, ipv6);
+	}
 
-	if (!gs)
-		gs = ERR_PTR(-EINVAL);
+	mutex_unlock(&geneve_mutex);
 
 	return gs;
 }
@@ -406,19 +402,18 @@ EXPORT_SYMBOL_GPL(geneve_sock_add);
 
 void geneve_sock_release(struct geneve_sock *gs)
 {
-	struct net *net = sock_net(gs->sock->sk);
-	struct geneve_net *gn = net_generic(net, geneve_net_id);
+	mutex_lock(&geneve_mutex);
 
-	if (!atomic_dec_and_test(&gs->refcnt))
-		return;
+	if (--gs->refcnt)
+		goto unlock;
 
-	spin_lock(&gn->sock_lock);
-	hlist_del_rcu(&gs->hlist);
+	hlist_del(&gs->hlist);
 	geneve_notify_del_rx_port(gs);
-	spin_unlock(&gn->sock_lock);
-
 	udp_tunnel_sock_release(gs->sock);
 	kfree_rcu(gs, rcu);
+
+unlock:
+	mutex_unlock(&geneve_mutex);
 }
 EXPORT_SYMBOL_GPL(geneve_sock_release);
 
@@ -427,8 +422,6 @@ static __net_init int geneve_init_net(struct net *net)
 	struct geneve_net *gn = net_generic(net, geneve_net_id);
 	unsigned int h;
 
-	spin_lock_init(&gn->sock_lock);
-
 	for (h = 0; h < PORT_HASH_SIZE; ++h)
 		INIT_HLIST_HEAD(&gn->sock_list[h]);
 
@@ -454,7 +447,7 @@ static int __init geneve_init_module(void)
 
 	return 0;
 }
-late_initcall(geneve_init_module);
+module_init(geneve_init_module);
 
 static void __exit geneve_cleanup_module(void)
 {
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next 0/4] Geneve Cleanups
From: Jesse Gross @ 2015-01-03  2:26 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

Much of the basis for the Geneve code comes from VXLAN. However,
Geneve is quite a bit simpler than VXLAN and so this cleans up a
lot of the infrastruction - particularly around locking - where the
extra complexity is not necessary.

Jesse Gross (4):
  geneve: Remove workqueue.
  geneve: Simplify locking.
  geneve: Remove socket hash table.
  geneve: Check family when reusing sockets.

 include/net/geneve.h |   5 +--
 net/ipv4/geneve.c    | 101 +++++++++++++++++----------------------------------
 2 files changed, 35 insertions(+), 71 deletions(-)

-- 
1.9.1

^ permalink raw reply

* [PATCH net-next 1/4] geneve: Remove workqueue.
From: Jesse Gross @ 2015-01-03  2:26 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <1420251965-44794-1-git-send-email-jesse@nicira.com>

The work queue is used only to free the UDP socket upon destruction.
This is not necessary with Geneve and generally makes the code more
difficult to reason about. It also introduces nondeterministic
behavior such as when a socket is rapidly deleted and recreated, which
could fail as the the deletion happens asynchronously.

Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 include/net/geneve.h |  1 -
 net/ipv4/geneve.c    | 21 ++-------------------
 2 files changed, 2 insertions(+), 20 deletions(-)

diff --git a/include/net/geneve.h b/include/net/geneve.h
index 112132c..56c7e1a 100644
--- a/include/net/geneve.h
+++ b/include/net/geneve.h
@@ -71,7 +71,6 @@ struct geneve_sock {
 	struct hlist_node	hlist;
 	geneve_rcv_t		*rcv;
 	void			*rcv_data;
-	struct work_struct	del_work;
 	struct socket		*sock;
 	struct rcu_head		rcu;
 	atomic_t		refcnt;
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 19e256e..136a829 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -61,8 +61,6 @@ struct geneve_net {
 
 static int geneve_net_id;
 
-static struct workqueue_struct *geneve_wq;
-
 static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
 {
 	return (struct genevehdr *)(udp_hdr(skb) + 1);
@@ -307,15 +305,6 @@ error:
 	return 1;
 }
 
-static void geneve_del_work(struct work_struct *work)
-{
-	struct geneve_sock *gs = container_of(work, struct geneve_sock,
-					      del_work);
-
-	udp_tunnel_sock_release(gs->sock);
-	kfree_rcu(gs, rcu);
-}
-
 static struct socket *geneve_create_sock(struct net *net, bool ipv6,
 					 __be16 port)
 {
@@ -356,8 +345,6 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 	if (!gs)
 		return ERR_PTR(-ENOMEM);
 
-	INIT_WORK(&gs->del_work, geneve_del_work);
-
 	sock = geneve_create_sock(net, ipv6, port);
 	if (IS_ERR(sock)) {
 		kfree(gs);
@@ -430,7 +417,8 @@ void geneve_sock_release(struct geneve_sock *gs)
 	geneve_notify_del_rx_port(gs);
 	spin_unlock(&gn->sock_lock);
 
-	queue_work(geneve_wq, &gs->del_work);
+	udp_tunnel_sock_release(gs->sock);
+	kfree_rcu(gs, rcu);
 }
 EXPORT_SYMBOL_GPL(geneve_sock_release);
 
@@ -458,10 +446,6 @@ static int __init geneve_init_module(void)
 {
 	int rc;
 
-	geneve_wq = alloc_workqueue("geneve", 0, 0);
-	if (!geneve_wq)
-		return -ENOMEM;
-
 	rc = register_pernet_subsys(&geneve_net_ops);
 	if (rc)
 		return rc;
@@ -474,7 +458,6 @@ late_initcall(geneve_init_module);
 
 static void __exit geneve_cleanup_module(void)
 {
-	destroy_workqueue(geneve_wq);
 	unregister_pernet_subsys(&geneve_net_ops);
 }
 module_exit(geneve_cleanup_module);
-- 
1.9.1

^ permalink raw reply related

* [PATCH] ethtool: Extend ethtool plugin module eeprom API to phylib
From: Ed Swierk @ 2015-01-03  1:27 UTC (permalink / raw)
  To: netdev; +Cc: f.fainelli, linux-kernel, Ed Swierk

This patch extends the ethtool plugin module eeprom API to support cards
whose phy support is delegated to a separate driver.

The handlers for ETHTOOL_GMODULEINFO and ETHTOOL_GMODULEEEPROM call the
module_info and module_eeprom functions if the phy driver provides them;
otherwise the handlers call the equivalent ethtool_ops functions provided
by network drivers with built-in phy support.

Signed-off-by: Ed Swierk <eswierk@skyportsystems.com>
---
 include/linux/phy.h |  9 +++++++++
 net/core/ethtool.c  | 45 ++++++++++++++++++++++++++++++++++-----------
 2 files changed, 43 insertions(+), 11 deletions(-)

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 565188c..04e5f5c 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -472,6 +472,15 @@ struct phy_driver {
 	/* See set_wol, but for checking whether Wake on LAN is enabled. */
 	void (*get_wol)(struct phy_device *dev, struct ethtool_wolinfo *wol);
 
+	/* Get the size and type of the eeprom contained within a plug-in
+	 * module */
+	int (*module_info)(struct phy_device *dev,
+			   struct ethtool_modinfo *modinfo);
+
+	/* Get the eeprom information from the plug-in module */
+	int (*module_eeprom)(struct phy_device *dev,
+			     struct ethtool_eeprom *ee, u8 *data);
+
 	struct device_driver driver;
 };
 #define to_phy_driver(d) container_of(d, struct phy_driver, driver)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 30071de..466526b 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1405,20 +1405,31 @@ static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr)
 	return err;
 }
 
+static int __ethtool_get_module_info(struct net_device *dev,
+				     struct ethtool_modinfo *modinfo)
+{
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	struct phy_device *phydev = dev->phydev;
+
+	if (phydev && phydev->drv && phydev->drv->module_info)
+		return phydev->drv->module_info(phydev, modinfo);
+
+	if (ops->get_module_info)
+		return ops->get_module_info(dev, modinfo);
+
+	return -EOPNOTSUPP;
+}
+
 static int ethtool_get_module_info(struct net_device *dev,
 				   void __user *useraddr)
 {
 	int ret;
 	struct ethtool_modinfo modinfo;
-	const struct ethtool_ops *ops = dev->ethtool_ops;
-
-	if (!ops->get_module_info)
-		return -EOPNOTSUPP;
 
 	if (copy_from_user(&modinfo, useraddr, sizeof(modinfo)))
 		return -EFAULT;
 
-	ret = ops->get_module_info(dev, &modinfo);
+	ret = __ethtool_get_module_info(dev, &modinfo);
 	if (ret)
 		return ret;
 
@@ -1428,21 +1439,33 @@ static int ethtool_get_module_info(struct net_device *dev,
 	return 0;
 }
 
+static int __ethtool_get_module_eeprom(struct net_device *dev,
+				       struct ethtool_eeprom *ee, u8 *data)
+{
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	struct phy_device *phydev = dev->phydev;
+
+	if (phydev && phydev->drv && phydev->drv->module_eeprom)
+		return phydev->drv->module_eeprom(phydev, ee, data);
+
+	if (ops->get_module_eeprom)
+		return ops->get_module_eeprom(dev, ee, data);
+
+	return -EOPNOTSUPP;
+}
+
 static int ethtool_get_module_eeprom(struct net_device *dev,
 				     void __user *useraddr)
 {
 	int ret;
 	struct ethtool_modinfo modinfo;
-	const struct ethtool_ops *ops = dev->ethtool_ops;
-
-	if (!ops->get_module_info || !ops->get_module_eeprom)
-		return -EOPNOTSUPP;
 
-	ret = ops->get_module_info(dev, &modinfo);
+	ret = __ethtool_get_module_info(dev, &modinfo);
 	if (ret)
 		return ret;
 
-	return ethtool_get_any_eeprom(dev, useraddr, ops->get_module_eeprom,
+	return ethtool_get_any_eeprom(dev, useraddr,
+				      __ethtool_get_module_eeprom,
 				      modinfo.eeprom_len);
 }
 
-- 
1.9.1

^ permalink raw reply related

* Re: [PATCH iproute2 v2] bridge/link: add learning_sync policy flag
From: Siva Mannem @ 2015-01-03  1:23 UTC (permalink / raw)
  To: Scott Feldman
  Cc: stephen@networkplumber.org, Netdev, Jiří Pírko,
	Roopa Prabhu
In-Reply-To: <CAE4R7bBpmR429F--8NNx0Jqos_bR-x2G4ybjmPhDW6Q+kV5y2g@mail.gmail.com>

> I think the aging settings are per-bridge, not per-bridge-port, so the
> policy control you're talking about wouldn't end up here on
> /sbin/bridge link.
>
Agree.

> However, I would argue even with hardware aging capability, we still
> should use Linux for aging since all the controls are already there
> and it just works.  It keeps the swdev model simple and the swdev
> driver simple.  Do you have a counter-argument for why enabling
> hardware aging would be better?

When traffic is being forwarded in hardware and if the entries are to
be software aged, the aging logic needs to periodically(once every
configured age interval) retrieve the FDB entry's hit bit in the
hardware. If this bit is not set it deletes the entry. If the hit bit
is set, it clears it. This could be CPU intensive if the FDB size runs
into few thousand entries.

If hardware aging is enabled, the above job is offloaded to hardware.

The current aging logic does not  differentiate between software
forwarded and hardware forwarded entries. When the traffic is being
forwarded in hardware, the aging logic is deleting the entries after
age interval.

So the solution could be

1)When hardware aging is enabled, only the notifications(probably via
br_fdb_external_learn_delete())  delete the fdb entries.
2)When software aging is enabled, aging logic needs to differentiate
between software forwarded and hardware forwarded entries. For entries
forwarded in hardware, the above aging logic needs to be implemented.

On Fri, Jan 2, 2015 at 11:27 PM, Scott Feldman <sfeldma@gmail.com> wrote:
> On Fri, Jan 2, 2015 at 9:14 AM, Siva Mannem <siva.mannem.lnx@gmail.com> wrote:
>> Hi,
>>
>>
>>> +.BR "learning_sync on " or " learning_sync off "
>>> +Controls whether a given port will sync MAC addresses learned on device port to
>>> +bridge FDB.
>>> +
>>
>> For the FDB entries synced from device port to bridge FDB, can the
>> device port also mention that it will take care of aging the synced
>> entries? I am thinking of a use case where the port supports hardware
>> learning and hardware aging?
>
> I think the aging settings are per-bridge, not per-bridge-port, so the
> policy control you're talking about wouldn't end up here on
> /sbin/bridge link.
>
> However, I would argue even with hardware aging capability, we still
> should use Linux for aging since all the controls are already there
> and it just works.  It keeps the swdev model simple and the swdev
> driver simple.  Do you have a counter-argument for why enabling
> hardware aging would be better?
>
> -scott

^ permalink raw reply

* [PATCH iproute2 3/3] ss: Filtering logic changing, with fixes
From: Vadim Kochan @ 2015-01-03  0:44 UTC (permalink / raw)
  To: netdev; +Cc: Vadim Kochan
In-Reply-To: <1420245877-11763-1-git-send-email-vadim4j@gmail.com>

From: Vadim Kochan <vadim4j@gmail.com>

This patch fixes some filtering combinations issues which does not
work on the 'master' version:

    $ ss -4
    shows inet & unix sockets, instead of only inet sockets

    $ ss -u
    needs to specify 'state closed'

    $ ss src unix:*X11*
    needs to specify '-x' shortcut for UNIX family

    $ ss -A all
    shows only sockets with established states

There might some other issues which was not observed.

Also changed logic for calculating families, socket types and
states filtering. I think that this version is a little simpler
one. Now there are 2 predefined default tables which describes
the following maping:

    family  -> (states, dbs)
    db      -> (states, families)

Signed-off-by: Vadim Kochan <vadim4j@gmail.com>
---
 misc/ss.c | 315 +++++++++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 210 insertions(+), 105 deletions(-)

diff --git a/misc/ss.c b/misc/ss.c
index c68af19..31f316e 100644
--- a/misc/ss.c
+++ b/misc/ss.c
@@ -128,6 +128,7 @@ enum
 #define PACKET_DBM ((1<<PACKET_DG_DB)|(1<<PACKET_R_DB))
 #define UNIX_DBM ((1<<UNIX_DG_DB)|(1<<UNIX_ST_DB)|(1<<UNIX_SQ_DB))
 #define ALL_DB ((1<<MAX_DB)-1)
+#define INET_DBM ((1<<TCP_DB)|(1<<UDP_DB)|(1<<DCCP_DB)|(1<<RAW_DB))
 
 enum {
 	SS_UNKNOWN,
@@ -145,7 +146,8 @@ enum {
 	SS_MAX
 };
 
-#define SS_ALL ((1<<SS_MAX)-1)
+#define SS_ALL ((1 << SS_MAX) - 1)
+#define SS_CONN (SS_ALL & ~((1<<SS_LISTEN)|(1<<SS_CLOSE)|(1<<SS_TIME_WAIT)|(1<<SS_SYN_RECV)))
 
 #include "ssfilter.h"
 
@@ -157,14 +159,120 @@ struct filter
 	struct ssfilter *f;
 };
 
+struct filter default_dbs[MAX_DB] = {
+	[TCP_DB] = {
+		.states   = SS_CONN,
+		.families = (1 << AF_INET) | (1 << AF_INET6),
+	},
+	[DCCP_DB] = {
+		.states   = SS_CONN,
+		.families = (1 << AF_INET) | (1 << AF_INET6),
+	},
+	[UDP_DB] = {
+		.states   = (1 << SS_CLOSE),
+		.families = (1 << AF_INET) | (1 << AF_INET6),
+	},
+	[RAW_DB] = {
+		.states   = (1 << SS_CLOSE),
+		.families = (1 << AF_INET) | (1 << AF_INET6),
+	},
+	[UNIX_DG_DB] = {
+		.states   = (1 << SS_CLOSE),
+		.families = (1 << AF_UNIX),
+	},
+	[UNIX_ST_DB] = {
+		.states   = SS_CONN,
+		.families = (1 << AF_UNIX),
+	},
+	[UNIX_SQ_DB] = {
+		.states   = SS_CONN,
+		.families = (1 << AF_UNIX),
+	},
+	[PACKET_DG_DB] = {
+		.states   = (1 << SS_CLOSE),
+		.families = (1 << AF_PACKET),
+	},
+	[PACKET_R_DB] = {
+		.states   = (1 << SS_CLOSE),
+		.families = (1 << AF_PACKET),
+	},
+	[NETLINK_DB] = {
+		.states   = (1 << SS_CLOSE),
+		.families = (1 << AF_NETLINK),
+	},
+};
+
+struct filter default_afs[AF_MAX] = {
+	[AF_INET] = {
+		.dbs = INET_DBM,
+		.states = SS_CONN,
+	},
+	[AF_INET6] = {
+		.dbs = INET_DBM,
+		.states = SS_CONN,
+	},
+	[AF_UNIX] = {
+		.dbs = UNIX_DBM,
+		.states = SS_CONN,
+	},
+	[AF_PACKET] = {
+		.dbs = PACKET_DBM,
+		.states = (1 << SS_CLOSE),
+	},
+	[AF_NETLINK] = {
+		.dbs = (1 << NETLINK_DB),
+		.states = (1 << SS_CLOSE),
+	},
+};
+
+static int do_default = 1;
 struct filter default_filter = {
-	.dbs	=  ~0,
-	.states = SS_ALL & ~((1<<SS_LISTEN)|(1<<SS_CLOSE)|(1<<SS_TIME_WAIT)|(1<<SS_SYN_RECV)),
-	.families= (1<<AF_INET)|(1<<AF_INET6),
+	.dbs	  =  ~0,
+	.states   = SS_CONN,
+	.families = (1 << AF_INET) | (1 << AF_INET6) | (1 << AF_UNIX),
 };
 
 struct filter current_filter;
 
+static void filter_db_set(struct filter *f, int db)
+{
+	f->states   |= default_dbs[db].states;
+	f->families |= default_dbs[db].families;
+	f->dbs	    |= 1 << db;
+	do_default   = 0;
+}
+
+static void filter_af_set(struct filter *f, int af)
+{
+	f->dbs	    |= default_afs[af].dbs;
+	f->states   |= default_afs[af].states;
+	f->families |= 1 << af;
+	do_default   = 0;
+}
+
+static int filter_af_get(struct filter *f, int af)
+{
+	return f->families & (1 << af);
+}
+
+static void filter_merge(struct filter *to, struct filter *from)
+{
+	if (to->families)
+		to->families = (to->families | from->families) & to->families;
+	else
+		to->families |= from->families;
+
+	if (from->dbs)
+		to->dbs = (to->dbs | from->dbs) & from->dbs;
+	else
+		to->dbs |= from->dbs;
+
+	if (from->states)
+		to->states = (to->states | from->states) & from->states;
+	else
+		to->states |= from->states;
+}
+
 static FILE *generic_proc_open(const char *env, const char *name)
 {
 	const char *p = getenv(env);
@@ -1171,12 +1279,13 @@ void *parse_hostcond(char *addr)
 	char *port = NULL;
 	struct aafilter a;
 	struct aafilter *res;
-	int fam = preferred_family;
+	int fam = 0;
+	struct filter *f = &current_filter;
 
 	memset(&a, 0, sizeof(a));
 	a.port = -1;
 
-	if (fam == AF_UNIX || strncmp(addr, "unix:", 5) == 0) {
+	if (filter_af_get(f, AF_UNIX) || strncmp(addr, "unix:", 5) == 0) {
 		char *p;
 		a.addr.family = AF_UNIX;
 		if (strncmp(addr, "unix:", 5) == 0)
@@ -1184,10 +1293,11 @@ void *parse_hostcond(char *addr)
 		p = strdup(addr);
 		a.addr.bitlen = 8*strlen(p);
 		memcpy(a.addr.data, &p, sizeof(p));
+		fam = AF_UNIX;
 		goto out;
 	}
 
-	if (fam == AF_PACKET || strncmp(addr, "link:", 5) == 0) {
+	if (filter_af_get(f, AF_PACKET) || strncmp(addr, "link:", 5) == 0) {
 		a.addr.family = AF_PACKET;
 		a.addr.bitlen = 0;
 		if (strncmp(addr, "link:", 5) == 0)
@@ -1209,10 +1319,11 @@ void *parse_hostcond(char *addr)
 				return NULL;
 			a.addr.data[0] = ntohs(tmp);
 		}
+		fam = AF_PACKET;
 		goto out;
 	}
 
-	if (fam == AF_NETLINK || strncmp(addr, "netlink:", 8) == 0) {
+	if (filter_af_get(f, AF_NETLINK) || strncmp(addr, "netlink:", 8) == 0) {
 		a.addr.family = AF_NETLINK;
 		a.addr.bitlen = 0;
 		if (strncmp(addr, "netlink:", 8) == 0)
@@ -1234,13 +1345,14 @@ void *parse_hostcond(char *addr)
 			if (nl_proto_a2n(&a.addr.data[0], addr) == -1)
 				return NULL;
 		}
+		fam = AF_NETLINK;
 		goto out;
 	}
 
-	if (strncmp(addr, "inet:", 5) == 0) {
+	if (filter_af_get(f, AF_INET) || !strncmp(addr, "inet:", 5)) {
 		addr += 5;
 		fam = AF_INET;
-	} else if (strncmp(addr, "inet6:", 6) == 0) {
+	} else if (filter_af_get(f, AF_INET6) || !strncmp(addr, "inet6:", 6)) {
 		addr += 6;
 		fam = AF_INET6;
 	}
@@ -1309,7 +1421,10 @@ void *parse_hostcond(char *addr)
 		}
 	}
 
-	out:
+out:
+	if (fam)
+		filter_af_set(f, fam);
+
 	res = malloc(sizeof(*res));
 	if (res)
 		memcpy(res, &a, sizeof(a));
@@ -2459,6 +2574,9 @@ static int unix_show(struct filter *f)
 	int  cnt;
 	struct unixstat *list = NULL;
 
+	if (!filter_af_get(f, AF_UNIX))
+		return 0;
+
 	if (!getenv("PROC_NET_UNIX") && !getenv("PROC_ROOT")
 	    && unix_show_netlink(f) == 0)
 		return 0;
@@ -2701,7 +2819,7 @@ static int packet_show(struct filter *f)
 {
 	FILE *fp;
 
-	if (preferred_family != AF_PACKET && !(f->states & (1 << SS_CLOSE)))
+	if (!filter_af_get(f, AF_PACKET) && !(f->states & (1 << SS_CLOSE)))
 		return 0;
 
 	if (!getenv("PROC_NET_PACKET") && !getenv("PROC_ROOT") &&
@@ -2869,7 +2987,7 @@ static int netlink_show(struct filter *f)
 	int rq, wq, rc;
 	unsigned long long sk, cb;
 
-	if (preferred_family != AF_NETLINK && !(f->states & (1 << SS_CLOSE)))
+	if (!filter_af_get(f, AF_NETLINK) && !(f->states & (1 << SS_CLOSE)))
 		return 0;
 
 	if (!getenv("PROC_NET_NETLINK") && !getenv("PROC_ROOT") &&
@@ -3132,7 +3250,9 @@ static int scan_state(const char *state)
 		if (strcasecmp(state, sstate_namel[i]) == 0)
 			return (1<<i);
 	}
-	return 0;
+
+	fprintf(stderr, "ss: wrong state name: %s\n", state);
+	exit(-1);
 }
 
 static const struct option long_opts[] = {
@@ -3170,13 +3290,14 @@ static const struct option long_opts[] = {
 
 int main(int argc, char *argv[])
 {
-	int do_default = 1;
 	int saw_states = 0;
 	int saw_query = 0;
 	int do_summary = 0;
 	const char *dump_tcpdiag = NULL;
 	FILE *filter_fp = NULL;
 	int ch;
+	struct filter dbs_filter = {};
+	struct filter state_filter = {};
 
 	memset(&current_filter, 0, sizeof(current_filter));
 
@@ -3213,55 +3334,51 @@ int main(int argc, char *argv[])
 			show_bpf++;
 			break;
 		case 'd':
-			current_filter.dbs |= (1<<DCCP_DB);
-			do_default = 0;
+			filter_db_set(&dbs_filter, DCCP_DB);
 			break;
 		case 't':
-			current_filter.dbs |= (1<<TCP_DB);
-			do_default = 0;
+			filter_db_set(&dbs_filter, TCP_DB);
 			break;
 		case 'u':
-			current_filter.dbs |= (1<<UDP_DB);
-			do_default = 0;
+			filter_db_set(&dbs_filter, UDP_DB);
 			break;
 		case 'w':
-			current_filter.dbs |= (1<<RAW_DB);
-			do_default = 0;
+			filter_db_set(&dbs_filter, RAW_DB);
 			break;
 		case 'x':
-			current_filter.dbs |= UNIX_DBM;
-			do_default = 0;
+			filter_af_set(&current_filter, AF_UNIX);
 			break;
 		case 'a':
-			current_filter.states = SS_ALL;
+			state_filter.states = SS_ALL;
 			break;
 		case 'l':
-			current_filter.states = (1<<SS_LISTEN) | (1<<SS_CLOSE);
+			state_filter.states = (1 << SS_LISTEN) | (1 << SS_CLOSE);
 			break;
 		case '4':
-			preferred_family = AF_INET;
+			filter_af_set(&current_filter, AF_INET);
 			break;
 		case '6':
-			preferred_family = AF_INET6;
+			filter_af_set(&current_filter, AF_INET6);
 			break;
 		case '0':
-			preferred_family = AF_PACKET;
+			filter_af_set(&current_filter, AF_PACKET);
 			break;
 		case 'f':
 			if (strcmp(optarg, "inet") == 0)
-				preferred_family = AF_INET;
+				filter_af_set(&current_filter, AF_INET);
 			else if (strcmp(optarg, "inet6") == 0)
-				preferred_family = AF_INET6;
+				filter_af_set(&current_filter, AF_INET6);
 			else if (strcmp(optarg, "link") == 0)
-				preferred_family = AF_PACKET;
+				filter_af_set(&current_filter, AF_PACKET);
 			else if (strcmp(optarg, "unix") == 0)
-				preferred_family = AF_UNIX;
+				filter_af_set(&current_filter, AF_UNIX);
 			else if (strcmp(optarg, "netlink") == 0)
-				preferred_family = AF_NETLINK;
+				filter_af_set(&current_filter, AF_NETLINK);
 			else if (strcmp(optarg, "help") == 0)
 				help();
 			else {
-				fprintf(stderr, "ss: \"%s\" is invalid family\n", optarg);
+				fprintf(stderr, "ss: \"%s\" is invalid family\n",
+						optarg);
 				usage();
 			}
 			break;
@@ -3278,38 +3395,53 @@ int main(int argc, char *argv[])
 				if ((p1 = strchr(p, ',')) != NULL)
 					*p1 = 0;
 				if (strcmp(p, "all") == 0) {
-					current_filter.dbs = ALL_DB;
+					filter_db_set(&dbs_filter, UDP_DB);
+					filter_db_set(&dbs_filter, DCCP_DB);
+					filter_db_set(&dbs_filter, TCP_DB);
+					filter_db_set(&dbs_filter, RAW_DB);
+					filter_db_set(&dbs_filter, UNIX_ST_DB);
+					filter_db_set(&dbs_filter, UNIX_DG_DB);
+					filter_db_set(&dbs_filter, UNIX_SQ_DB);
+					filter_db_set(&dbs_filter, PACKET_R_DB);
+					filter_db_set(&dbs_filter, PACKET_DG_DB);
+					filter_db_set(&dbs_filter, NETLINK_DB);
 				} else if (strcmp(p, "inet") == 0) {
-					current_filter.dbs |= (1<<TCP_DB)|(1<<DCCP_DB)|(1<<UDP_DB)|(1<<RAW_DB);
+					filter_db_set(&dbs_filter, UDP_DB);
+					filter_db_set(&dbs_filter, DCCP_DB);
+					filter_db_set(&dbs_filter, TCP_DB);
+					filter_db_set(&dbs_filter, RAW_DB);
 				} else if (strcmp(p, "udp") == 0) {
-					current_filter.dbs |= (1<<UDP_DB);
+					filter_db_set(&dbs_filter, UDP_DB);
 				} else if (strcmp(p, "dccp") == 0) {
-					current_filter.dbs |= (1<<DCCP_DB);
+					filter_db_set(&dbs_filter, DCCP_DB);
 				} else if (strcmp(p, "tcp") == 0) {
-					current_filter.dbs |= (1<<TCP_DB);
+					filter_db_set(&dbs_filter, TCP_DB);
 				} else if (strcmp(p, "raw") == 0) {
-					current_filter.dbs |= (1<<RAW_DB);
+					filter_db_set(&dbs_filter, RAW_DB);
 				} else if (strcmp(p, "unix") == 0) {
-					current_filter.dbs |= UNIX_DBM;
+					filter_db_set(&dbs_filter, UNIX_ST_DB);
+					filter_db_set(&dbs_filter, UNIX_DG_DB);
+					filter_db_set(&dbs_filter, UNIX_SQ_DB);
 				} else if (strcasecmp(p, "unix_stream") == 0 ||
 					   strcmp(p, "u_str") == 0) {
-					current_filter.dbs |= (1<<UNIX_ST_DB);
+					filter_db_set(&dbs_filter, UNIX_ST_DB);
 				} else if (strcasecmp(p, "unix_dgram") == 0 ||
 					   strcmp(p, "u_dgr") == 0) {
-					current_filter.dbs |= (1<<UNIX_DG_DB);
+					filter_db_set(&dbs_filter, UNIX_DG_DB);
 				} else if (strcasecmp(p, "unix_seqpacket") == 0 ||
 					   strcmp(p, "u_seq") == 0) {
-					current_filter.dbs |= (1<<UNIX_SQ_DB);
+					filter_db_set(&dbs_filter, UNIX_SQ_DB);
 				} else if (strcmp(p, "packet") == 0) {
-					current_filter.dbs |= PACKET_DBM;
+					filter_db_set(&dbs_filter, PACKET_R_DB);
+					filter_db_set(&dbs_filter, PACKET_DG_DB);
 				} else if (strcmp(p, "packet_raw") == 0 ||
 					   strcmp(p, "p_raw") == 0) {
-					current_filter.dbs |= (1<<PACKET_R_DB);
+					filter_db_set(&dbs_filter, PACKET_R_DB);
 				} else if (strcmp(p, "packet_dgram") == 0 ||
 					   strcmp(p, "p_dgr") == 0) {
-					current_filter.dbs |= (1<<PACKET_DG_DB);
+					filter_db_set(&dbs_filter, PACKET_DG_DB);
 				} else if (strcmp(p, "netlink") == 0) {
-					current_filter.dbs |= (1<<NETLINK_DB);
+					filter_db_set(&dbs_filter, NETLINK_DB);
 				} else {
 					fprintf(stderr, "ss: \"%s\" is illegal socket table id\n", p);
 					usage();
@@ -3371,57 +3503,6 @@ int main(int argc, char *argv[])
 			exit(0);
 	}
 
-	if (do_default)
-		current_filter.dbs = default_filter.dbs;
-
-	if (preferred_family == AF_UNSPEC) {
-		if (!(current_filter.dbs&~UNIX_DBM))
-			preferred_family = AF_UNIX;
-		else if (!(current_filter.dbs&~PACKET_DBM))
-			preferred_family = AF_PACKET;
-		else if (!(current_filter.dbs&~(1<<NETLINK_DB)))
-			preferred_family = AF_NETLINK;
-	}
-
-	if (preferred_family != AF_UNSPEC) {
-		int mask2;
-		if (preferred_family == AF_INET ||
-		    preferred_family == AF_INET6) {
-			mask2= current_filter.dbs;
-		} else if (preferred_family == AF_PACKET) {
-			mask2 = PACKET_DBM;
-		} else if (preferred_family == AF_UNIX) {
-			mask2 = UNIX_DBM;
-		} else if (preferred_family == AF_NETLINK) {
-			mask2 = (1<<NETLINK_DB);
-		} else {
-			mask2 = 0;
-		}
-
-		if (do_default)
-			current_filter.dbs = mask2;
-		else
-			current_filter.dbs &= mask2;
-		current_filter.families = (1<<preferred_family);
-	} else {
-		if (!do_default)
-			current_filter.families = ~0;
-		else
-			current_filter.families = default_filter.families;
-	}
-	if (current_filter.dbs == 0) {
-		fprintf(stderr, "ss: no socket tables to show with such filter.\n");
-		exit(0);
-	}
-	if (current_filter.families == 0) {
-		fprintf(stderr, "ss: no families to show with such filter.\n");
-		exit(0);
-	}
-
-	if (resolve_services && resolve_hosts &&
-	    (current_filter.dbs&(UNIX_DBM|(1<<TCP_DB)|(1<<UDP_DB)|(1<<DCCP_DB))))
-		init_service_resolver();
-
 	/* Now parse filter... */
 	if (argc == 0 && filter_fp) {
 		if (ssfilter_parse(&current_filter.f, 0, NULL, filter_fp))
@@ -3432,15 +3513,15 @@ int main(int argc, char *argv[])
 		if (strcmp(*argv, "state") == 0) {
 			NEXT_ARG();
 			if (!saw_states)
-				current_filter.states = 0;
-			current_filter.states |= scan_state(*argv);
+				state_filter.states = 0;
+			state_filter.states |= scan_state(*argv);
 			saw_states = 1;
 		} else if (strcmp(*argv, "exclude") == 0 ||
 			   strcmp(*argv, "excl") == 0) {
 			NEXT_ARG();
 			if (!saw_states)
-				current_filter.states = SS_ALL;
-			current_filter.states &= ~scan_state(*argv);
+				state_filter.states = SS_ALL;
+			state_filter.states &= ~scan_state(*argv);
 			saw_states = 1;
 		} else {
 			if (ssfilter_parse(&current_filter.f, argc, argv, filter_fp))
@@ -3450,6 +3531,30 @@ int main(int argc, char *argv[])
 		argc--; argv++;
 	}
 
+	if (do_default) {
+		current_filter.dbs = default_filter.dbs;
+		current_filter.families = default_filter.families;
+	} else {
+		filter_merge(&current_filter, &dbs_filter);
+	}
+
+	state_filter.dbs      = current_filter.dbs;
+	state_filter.families = current_filter.families;
+	filter_merge(&current_filter, &state_filter);
+
+	if (resolve_services && resolve_hosts &&
+	    (current_filter.dbs&(UNIX_DBM|(1<<TCP_DB)|(1<<UDP_DB)|(1<<DCCP_DB))))
+		init_service_resolver();
+
+
+	if (current_filter.dbs == 0) {
+		fprintf(stderr, "ss: no socket tables to show with such filter.\n");
+		exit(0);
+	}
+	if (current_filter.families == 0) {
+		fprintf(stderr, "ss: no families to show with such filter.\n");
+		exit(0);
+	}
 	if (current_filter.states == 0) {
 		fprintf(stderr, "ss: no socket states to show with such filter.\n");
 		exit(0);
-- 
2.1.3

^ permalink raw reply related

* [PATCH iproute2 2/3] ss: Unify packet stats output from netlink and proc
From: Vadim Kochan @ 2015-01-03  0:44 UTC (permalink / raw)
  To: netdev; +Cc: Vadim Kochan
In-Reply-To: <1420245877-11763-1-git-send-email-vadim4j@gmail.com>

From: Vadim Kochan <vadim4j@gmail.com>

Refactored to use one func for output packet stats info
from both /proc and netlink.

Added possibility to get packet stats info from /proc
by setting environment variable PROC_ROOT or PROC_NET_PACKET.

Signed-off-by: Vadim Kochan <vadim4j@gmail.com>
---
 misc/ss.c | 215 +++++++++++++++++++++++++++++---------------------------------
 1 file changed, 100 insertions(+), 115 deletions(-)

diff --git a/misc/ss.c b/misc/ss.c
index a28ca4a..c68af19 100644
--- a/misc/ss.c
+++ b/misc/ss.c
@@ -2535,70 +2535,103 @@ static int unix_show(struct filter *f)
 	return 0;
 }
 
-static int packet_show_sock(const struct sockaddr_nl *addr,
-		struct nlmsghdr *nlh, void *arg)
-{
-	struct packet_diag_msg *r = NLMSG_DATA(nlh);
-	struct rtattr *tb[PACKET_DIAG_MAX+1];
-	__u32 rq;
+struct pktstat {
+	int type;
+	int prot;
+	int iface;
+	int state;
+	int rq;
+	int uid;
+	int ino;
+};
 
-	parse_rtattr(tb, PACKET_DIAG_MAX, (struct rtattr*)(r+1),
-		     nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
+static int packet_stats_print(struct pktstat *s, const struct filter *f)
+{
+	char *buf = NULL;
 
-	/* use /proc/net/packet if all info are not available */
-	if (!tb[PACKET_DIAG_MEMINFO])
-		return -1;
+	if (f->f) {
+		struct tcpstat tst;
+		tst.local.family = AF_PACKET;
+		tst.remote.family = AF_PACKET;
+		tst.rport = 0;
+		tst.lport = s->iface;
+		tst.local.data[0] = s->prot;
+		tst.remote.data[0] = 0;
+		if (run_ssfilter(f->f, &tst) == 0)
+			return 1;
+	}
 
 	if (netid_width)
 		printf("%-*s ", netid_width,
-				r->pdiag_type == SOCK_RAW ? "p_raw" : "p_dgr");
+				s->type == SOCK_RAW ? "p_raw" : "p_dgr");
 	if (state_width)
 		printf("%-*s ", state_width, "UNCONN");
 
-	if (tb[PACKET_DIAG_MEMINFO]) {
-		__u32 *skmeminfo = RTA_DATA(tb[PACKET_DIAG_MEMINFO]);
-
-		rq = skmeminfo[SK_MEMINFO_RMEM_ALLOC];
-	} else
-		rq = 0;
-	printf("%-6d %-6d ", rq, 0);
-
-	if (r->pdiag_num == 3) {
+	printf("%-6d %-6d ", s->rq, 0);
+	if (s->prot == 3) {
 		printf("%*s:", addr_width, "*");
 	} else {
-		char tb2[16];
+		char tb[16];
 		printf("%*s:", addr_width,
-		       ll_proto_n2a(htons(r->pdiag_num), tb2, sizeof(tb2)));
+				ll_proto_n2a(htons(s->prot), tb, sizeof(tb)));
 	}
-	if (tb[PACKET_DIAG_INFO]) {
-		struct packet_diag_info *pinfo = RTA_DATA(tb[PACKET_DIAG_INFO]);
-
-		if (pinfo->pdi_index == 0)
-			printf("%-*s ", serv_width, "*");
-		else
-			printf("%-*s ", serv_width, xll_index_to_name(pinfo->pdi_index));
-	} else
+	if (s->iface == 0) {
 		printf("%-*s ", serv_width, "*");
+	} else {
+		printf("%-*s ", serv_width, xll_index_to_name(s->iface));
+	}
 
-	printf("%*s*%-*s",
-	       addr_width, "", serv_width, "");
-
-	char *buf = NULL;
+	printf("%*s*%-*s", addr_width, "", serv_width, "");
 
 	if (show_proc_ctx || show_sock_ctx) {
-		if (find_entry(r->pdiag_ino, &buf,
-				(show_proc_ctx & show_sock_ctx) ?
-				PROC_SOCK_CTX : PROC_CTX) > 0) {
+		if (find_entry(s->ino, &buf,
+					(show_proc_ctx & show_sock_ctx) ?
+					PROC_SOCK_CTX : PROC_CTX) > 0) {
 			printf(" users:(%s)", buf);
 			free(buf);
 		}
 	} else if (show_users) {
-		if (find_entry(r->pdiag_ino, &buf, USERS) > 0) {
+		if (find_entry(s->ino, &buf, USERS) > 0) {
 			printf(" users:(%s)", buf);
 			free(buf);
 		}
 	}
 
+	return 0;
+}
+
+static int packet_show_sock(const struct sockaddr_nl *addr,
+		struct nlmsghdr *nlh, void *arg)
+{
+	const struct filter *f = arg;
+	struct packet_diag_msg *r = NLMSG_DATA(nlh);
+	struct rtattr *tb[PACKET_DIAG_MAX+1];
+	struct pktstat stat = {};
+
+	parse_rtattr(tb, PACKET_DIAG_MAX, (struct rtattr*)(r+1),
+		     nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
+
+	/* use /proc/net/packet if all info are not available */
+	if (!tb[PACKET_DIAG_MEMINFO])
+		return -1;
+
+	stat.type = r->pdiag_type;
+	stat.prot = r->pdiag_num;
+	stat.ino = r->pdiag_ino;
+
+	if (tb[PACKET_DIAG_MEMINFO]) {
+		__u32 *skmeminfo = RTA_DATA(tb[PACKET_DIAG_MEMINFO]);
+		stat.rq = skmeminfo[SK_MEMINFO_RMEM_ALLOC];
+	}
+
+	if (tb[PACKET_DIAG_INFO]) {
+		struct packet_diag_info *pinfo = RTA_DATA(tb[PACKET_DIAG_INFO]);
+		stat.iface = pinfo->pdi_index;
+	}
+
+	if (packet_stats_print(&stat, f))
+		return 0;
+
 	if (show_details) {
 		__u32 uid = 0;
 
@@ -2639,94 +2672,46 @@ static int packet_show_netlink(struct filter *f)
 	return handle_netlink_request(f, &req.nlh, sizeof(req), packet_show_sock);
 }
 
+static int packet_show_line(char *buf, const struct filter *f, int fam)
+{
+	unsigned long long sk;
+	struct pktstat stat = {};
+
+	sscanf(buf, "%llx %*d %d %x %d %d %u %u %u",
+			&sk,
+			&stat.type, &stat.prot, &stat.iface, &stat.state,
+			&stat.rq, &stat.uid, &stat.ino);
+
+	if (stat.type == SOCK_RAW && !(f->dbs&(1<<PACKET_R_DB)))
+		return 0;
+	if (stat.type == SOCK_DGRAM && !(f->dbs&(1<<PACKET_DG_DB)))
+		return 0;
+
+	if (packet_stats_print(&stat, f))
+		return 0;
+
+	if (show_details) {
+		printf(" ino=%u uid=%u sk=%llx", stat.ino, stat.uid, sk);
+	}
+	printf("\n");
+	return 0;
+}
 
 static int packet_show(struct filter *f)
 {
 	FILE *fp;
-	char buf[256];
-	int type;
-	int prot;
-	int iface;
-	int state;
-	int rq;
-	int uid;
-	int ino;
-	unsigned long long sk;
 
 	if (preferred_family != AF_PACKET && !(f->states & (1 << SS_CLOSE)))
 		return 0;
 
-	if (packet_show_netlink(f) == 0)
+	if (!getenv("PROC_NET_PACKET") && !getenv("PROC_ROOT") &&
+			packet_show_netlink(f) == 0)
 		return 0;
 
 	if ((fp = net_packet_open()) == NULL)
 		return -1;
-	fgets(buf, sizeof(buf)-1, fp);
-
-	while (fgets(buf, sizeof(buf)-1, fp)) {
-		sscanf(buf, "%llx %*d %d %x %d %d %u %u %u",
-		       &sk,
-		       &type, &prot, &iface, &state,
-		       &rq, &uid, &ino);
-
-		if (type == SOCK_RAW && !(f->dbs&(1<<PACKET_R_DB)))
-			continue;
-		if (type == SOCK_DGRAM && !(f->dbs&(1<<PACKET_DG_DB)))
-			continue;
-		if (f->f) {
-			struct tcpstat tst;
-			tst.local.family = AF_PACKET;
-			tst.remote.family = AF_PACKET;
-			tst.rport = 0;
-			tst.lport = iface;
-			tst.local.data[0] = prot;
-			tst.remote.data[0] = 0;
-			if (run_ssfilter(f->f, &tst) == 0)
-				continue;
-		}
-
-		if (netid_width)
-			printf("%-*s ", netid_width,
-			       type == SOCK_RAW ? "p_raw" : "p_dgr");
-		if (state_width)
-			printf("%-*s ", state_width, "UNCONN");
-		printf("%-6d %-6d ", rq, 0);
-		if (prot == 3) {
-			printf("%*s:", addr_width, "*");
-		} else {
-			char tb[16];
-			printf("%*s:", addr_width,
-			       ll_proto_n2a(htons(prot), tb, sizeof(tb)));
-		}
-		if (iface == 0) {
-			printf("%-*s ", serv_width, "*");
-		} else {
-			printf("%-*s ", serv_width, xll_index_to_name(iface));
-		}
-		printf("%*s*%-*s",
-		       addr_width, "", serv_width, "");
-
-		char *buf = NULL;
-
-		if (show_proc_ctx || show_sock_ctx) {
-			if (find_entry(ino, &buf,
-					(show_proc_ctx & show_sock_ctx) ?
-					PROC_SOCK_CTX : PROC_CTX) > 0) {
-				printf(" users:(%s)", buf);
-				free(buf);
-			}
-		} else if (show_users) {
-			if (find_entry(ino, &buf, USERS) > 0) {
-				printf(" users:(%s)", buf);
-				free(buf);
-			}
-		}
-
-		if (show_details) {
-			printf(" ino=%u uid=%u sk=%llx", ino, uid, sk);
-		}
-		printf("\n");
-	}
+	if (generic_record_read(fp, packet_show_line, f, AF_PACKET))
+		return -1;
 
 	return 0;
 }
-- 
2.1.3

^ permalink raw reply related

* [PATCH iproute2 1/3] ss: Unify unix stats output from netlink and proc
From: Vadim Kochan @ 2015-01-03  0:44 UTC (permalink / raw)
  To: netdev; +Cc: Vadim Kochan
In-Reply-To: <1420245877-11763-1-git-send-email-vadim4j@gmail.com>

From: Vadim Kochan <vadim4j@gmail.com>

Refactored to use one func for output unix stats info
from both /proc and netlink.

Signed-off-by: Vadim Kochan <vadim4j@gmail.com>
---
 misc/ss.c | 98 +++++++++++++++++++++++----------------------------------------
 1 file changed, 36 insertions(+), 62 deletions(-)

diff --git a/misc/ss.c b/misc/ss.c
index f0c7b34..a28ca4a 100644
--- a/misc/ss.c
+++ b/misc/ss.c
@@ -2234,6 +2234,7 @@ struct unixstat
 	struct unixstat *next;
 	int ino;
 	int peer;
+	char *peer_name;
 	int rq;
 	int wq;
 	int state;
@@ -2279,7 +2280,18 @@ static const char *unix_netid_name(int type)
 	return netid;
 }
 
-static void unix_list_print(struct unixstat *list, struct filter *f)
+static int unix_type_skip(struct unixstat *s, struct filter *f)
+{
+	if (s->type == SOCK_STREAM && !(f->dbs&(1<<UNIX_ST_DB)))
+		return 1;
+	if (s->type == SOCK_DGRAM && !(f->dbs&(1<<UNIX_DG_DB)))
+		return 1;
+	if (s->type == SOCK_SEQPACKET && !(f->dbs&(1<<UNIX_SQ_DB)))
+		return 1;
+	return 0;
+}
+
+static void unix_stats_print(struct unixstat *list, struct filter *f)
 {
 	struct unixstat *s;
 	char *peer;
@@ -2287,15 +2299,14 @@ static void unix_list_print(struct unixstat *list, struct filter *f)
 	for (s = list; s; s = s->next) {
 		if (!(f->states & (1<<s->state)))
 			continue;
-		if (s->type == SOCK_STREAM && !(f->dbs&(1<<UNIX_ST_DB)))
-			continue;
-		if (s->type == SOCK_DGRAM && !(f->dbs&(1<<UNIX_DG_DB)))
-			continue;
-		if (s->type == SOCK_SEQPACKET && !(f->dbs&(1<<UNIX_SQ_DB)))
+		if (unix_type_skip(s, f))
 			continue;
 
 		peer = "*";
-		if (s->peer) {
+		if (s->peer_name)
+			peer = s->peer_name;
+
+		if (s->peer && !s->peer_name) {
 			struct unixstat *p;
 			for (p = list; p; p = p->next) {
 				if (s->peer == p->ino)
@@ -2356,36 +2367,23 @@ static int unix_show_sock(const struct sockaddr_nl *addr, struct nlmsghdr *nlh,
 	struct unix_diag_msg *r = NLMSG_DATA(nlh);
 	struct rtattr *tb[UNIX_DIAG_MAX+1];
 	char name[128];
-	int peer_ino;
-	__u32 rqlen, wqlen;
+	struct unixstat stat = { .name = "*" , .peer_name = "*" };
 
 	parse_rtattr(tb, UNIX_DIAG_MAX, (struct rtattr*)(r+1),
 		     nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
 
-	if (r->udiag_type == SOCK_STREAM && !(f->dbs&(1<<UNIX_ST_DB)))
-		return 0;
-	if (r->udiag_type == SOCK_DGRAM && !(f->dbs&(1<<UNIX_DG_DB)))
-		return 0;
-	if (r->udiag_type == SOCK_SEQPACKET && !(f->dbs&(1<<UNIX_SQ_DB)))
-		return 0;
+	stat.type  = r->udiag_type;
+	stat.state = r->udiag_state;
+	stat.ino   = r->udiag_ino;
 
-	if (netid_width)
-		printf("%-*s ", netid_width,
-		       unix_netid_name(r->udiag_type));
-	if (state_width)
-		printf("%-*s ", state_width, sstate_name[r->udiag_state]);
+	if (unix_type_skip(&stat, f))
+		return 0;
 
 	if (tb[UNIX_DIAG_RQLEN]) {
 		struct unix_diag_rqlen *rql = RTA_DATA(tb[UNIX_DIAG_RQLEN]);
-		rqlen = rql->udiag_rqueue;
-		wqlen = rql->udiag_wqueue;
-	} else {
-		rqlen = 0;
-		wqlen = 0;
+		stat.rq = rql->udiag_rqueue;
+		stat.wq = rql->udiag_wqueue;
 	}
-
-	printf("%-6u %-6u ", rqlen, wqlen);
-
 	if (tb[UNIX_DIAG_NAME]) {
 		int len = RTA_PAYLOAD(tb[UNIX_DIAG_NAME]);
 
@@ -2393,41 +2391,17 @@ static int unix_show_sock(const struct sockaddr_nl *addr, struct nlmsghdr *nlh,
 		name[len] = '\0';
 		if (name[0] == '\0')
 			name[0] = '@';
-	} else
-		sprintf(name, "*");
-
+		stat.name = &name[0];
+	}
 	if (tb[UNIX_DIAG_PEER])
-		peer_ino = rta_getattr_u32(tb[UNIX_DIAG_PEER]);
-	else
-		peer_ino = 0;
-
-	printf("%*s %-*d %*s %-*d",
-			addr_width, name,
-			serv_width, r->udiag_ino,
-			addr_width, "*", /* FIXME */
-			serv_width, peer_ino);
-
-	char *buf = NULL;
+		stat.peer = rta_getattr_u32(tb[UNIX_DIAG_PEER]);
 
-	if (show_proc_ctx || show_sock_ctx) {
-		if (find_entry(r->udiag_ino, &buf,
-				(show_proc_ctx & show_sock_ctx) ?
-				PROC_SOCK_CTX : PROC_CTX) > 0) {
-			printf(" users:(%s)", buf);
-			free(buf);
-		}
-	} else if (show_users) {
-		if (find_entry(r->udiag_ino, &buf, USERS) > 0) {
-			printf(" users:(%s)", buf);
-			free(buf);
-		}
-	}
+	unix_stats_print(&stat, f);
 
 	if (show_mem) {
-		printf("\n\t");
+		printf("\t");
 		print_skmeminfo(tb, UNIX_DIAG_MEMINFO);
 	}
-
 	if (show_details) {
 		if (tb[UNIX_DIAG_SHUTDOWN]) {
 			unsigned char mask;
@@ -2435,9 +2409,8 @@ static int unix_show_sock(const struct sockaddr_nl *addr, struct nlmsghdr *nlh,
 			printf(" %c-%c", mask & 1 ? '-' : '<', mask & 2 ? '-' : '>');
 		}
 	}
-
-	printf("\n");
-
+	if (show_mem || show_details)
+		printf("\n");
 	return 0;
 }
 
@@ -2505,6 +2478,7 @@ static int unix_show(struct filter *f)
 		if (!(u = malloc(sizeof(*u))))
 			break;
 		u->name = NULL;
+		u->peer_name = NULL;
 
 		if (sscanf(buf, "%x: %x %x %x %x %x %d %s",
 			   &u->peer, &u->rq, &u->wq, &flags, &u->type,
@@ -2544,7 +2518,7 @@ static int unix_show(struct filter *f)
 			strcpy(u->name, name);
 		}
 		if (++cnt > MAX_UNIX_REMEMBER) {
-			unix_list_print(list, f);
+			unix_stats_print(list, f);
 			unix_list_free(list);
 			list = NULL;
 			cnt = 0;
@@ -2552,7 +2526,7 @@ static int unix_show(struct filter *f)
 	}
 	fclose(fp);
 	if (list) {
-		unix_list_print(list, f);
+		unix_stats_print(list, f);
 		unix_list_free(list);
 		list = NULL;
 		cnt = 0;
-- 
2.1.3

^ permalink raw reply related

* [PATCH iproute2 0/3] ss: Fix sockets filtering
From: Vadim Kochan @ 2015-01-03  0:44 UTC (permalink / raw)
  To: netdev; +Cc: Vadim Kochan

From: Vadim Kochan <vadim4j@gmail.com>

This series contains refactoring & fixes related to
sockets filtering.

1st 2 patches are related mostly to refactoring as they just
allows to use one func to output UNIX & PACKET socket stats from both Netlink &
/proc.

The last one have a big change which related to the way how filtering options
are combined with each other. This change has also fixes for some filtering
combination options.

I did some basic testing *BUT* I cant guarantee that there is no bugs ...
Here is my some testing list with comparing to the 'master' version:

Some test results with comparing version in PATCH and version from master
-------------------------------------------------------------------------

this   - ss version sending in PATCH
master - ss version in master

Case #1: Show only IPv4 sockets
    $ ss -4

    RESULTS
        this    -  shows only IPv4 sockets with established states     [OK]
        master  -  shows IPv4 and UNIX sockets with established states [FAIL]

Case #2: Show only IPv4 sockets with all states
    $ ss -4 -a

    RESULTS
        this    -  shows only IPv4 sockets with all states [OK]
        master  -  shows ALL sockets kinds with all states [FAIL]

Case #3: Show only IPv4 sockets with listen states (closed or listening)
    $ ss -4 -l

    RESULTS
        this    -  shows only IPv4 sockets with listen states    [OK]
        master  -  shows ALL sockets kinds with LISTENING states [FAIL]

Case #4 Show only IPv4 UDP sockets
    $ ss -4 -u

    RESULTS
        this    -  shows only IPv4 UDP sockets                                 [OK]
        master  -  shows IPv4 UDP sockets but only if state 'closed' specified [FAIL]
    
    In 'this' version it is not needed to set 'closed' state additionally for UDP sockets as it it set
    automatically because we explicitly specified UDP sockets. 

Case #5: Show all UDP sockets
    $ ss -u

    RESULTS
        this    -  shows all UDP sockets for both IPv4/IPv6 protocol families  [OK]
        master  -  shows IPv4 UDP sockets but only if state 'closed' specified [FAIL]

Case #6: Show all UDP sockets for IPv6 only protocol
    $ ss -u -6

    RESULTS
        this    -  shows all IPv6 UDP sockets                                  [OK]
        master  -  shows IPv4 UDP sockets but only if state 'closed' specified [FAIL]


Case #7: Show UNIX sockets with matches "*X11*" as src
    $ ss src unix:*X11*

    RESULTS
        this    -  shows only UNIX sockets with established states with src matches "*X11*" [OK]
        master  -  shows all established UNIX sockets with "RTNETLINK" errors               [FAIL]


Case #8: Show UNIX sockets with matches "*X11*" as src but with LISTENING states only
    $ ss src unix:*X11* -l

    RESULTS
        this    -  shows only UNIX sockets with LISTENING states with src matches "*X11*" [OK]
        master  -  Segmentation error                                                     [FAIL]

Case #9: Show all RAW sockets
    $ ss -w

    RESULTS
        this    - shows all RAW sockets for IPv4/IPv6 families                                  [OK]
        master  - shows all RAW sockets for IPv4/IPv6 families only if state 'closed' specified [FAIL]

Case #10: Show all TCP/UDP sockets
    $ ss -t -u

    RESULTS
        this    - shows all established TCP and unconnected UDP sockets for IPv4/IPv6 protocols. [OK]
        master  - shows only TCP established sockets, shows UDP                                  [FAIL]

Vadim Kochan (3):
  ss: Unify unix stats output from netlink and proc
  ss: Unify packet stats output from netlink and proc
  ss: Filtering logic changing, with fixes

 misc/ss.c | 628 ++++++++++++++++++++++++++++++++++----------------------------
 1 file changed, 346 insertions(+), 282 deletions(-)

-- 
2.1.3

^ permalink raw reply

* Re: [PATCH] GMAC: fix simple_return.cocci warnings
From: Joe Perches @ 2015-01-03  0:46 UTC (permalink / raw)
  To: kbuild test robot
  Cc: Roger Chen, kbuild-all, Giuseppe Cavallaro, netdev, linux-kernel
In-Reply-To: <20150103002526.GA15863@waimea.lkp.intel.com>

On Sat, 2015-01-03 at 08:25 +0800, kbuild test robot wrote:
> drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c:425:1-4: WARNING: end returns can be simpified
> 
>  Simplify a trivial if-return sequence.  Possibly combine with a
>  preceding function call.
> Generated by: scripts/coccinelle/misc/simple_return.cocci
> 
> CC: Roger Chen <roger.chen@rock-chips.com>
> Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
> ---
> 
>  dwmac-rk.c |    6 +-----
>  1 file changed, 1 insertion(+), 5 deletions(-)
> 
> --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
> +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
> @@ -422,11 +422,7 @@ static int rk_gmac_init(struct platform_
>  	if (ret)
>  		return ret;
>  
> -	ret = gmac_clk_enable(bsp_priv, true);
> -	if (ret)
> -		return ret;
> -
> -	return 0;
> +	return gmac_clk_enable(bsp_priv, true);

I think this change is not particularly better.

When the pattern is multiply repeated like:

{
	...
	foo = bar();
	if (foo)
		return foo;

	foo = baz();
	if (foo)
		return foo;

	foo = qux();
	if (foo)
		return foo;

	return 0;
}

I think it's better to not change the last
test in the sequence just to minimize overall
line count.

^ permalink raw reply

* [PATCH] GMAC: fix simple_return.cocci warnings
From: kbuild test robot @ 2015-01-03  0:25 UTC (permalink / raw)
  To: Roger Chen; +Cc: kbuild-all, Giuseppe Cavallaro, netdev, linux-kernel
In-Reply-To: <201501030822.7cG5bXrm%fengguang.wu@intel.com>

drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c:425:1-4: WARNING: end returns can be simpified

 Simplify a trivial if-return sequence.  Possibly combine with a
 preceding function call.
Generated by: scripts/coccinelle/misc/simple_return.cocci

CC: Roger Chen <roger.chen@rock-chips.com>
Signed-off-by: Fengguang Wu <fengguang.wu@intel.com>
---

 dwmac-rk.c |    6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -422,11 +422,7 @@ static int rk_gmac_init(struct platform_
 	if (ret)
 		return ret;
 
-	ret = gmac_clk_enable(bsp_priv, true);
-	if (ret)
-		return ret;
-
-	return 0;
+	return gmac_clk_enable(bsp_priv, true);
 }
 
 static void rk_gmac_exit(struct platform_device *pdev, void *priv)

^ permalink raw reply

* Re: [PATCH] net: ethernet: cpsw: fix hangs with interrupts
From: Tony Lindgren @ 2015-01-03  0:08 UTC (permalink / raw)
  To: Felipe Balbi
  Cc: David Miller, Mugunthan V N, Yegor Yefremov,
	Linux OMAP Mailing List, netdev, stable
In-Reply-To: <1420236959-32444-1-git-send-email-balbi@ti.com>

* Felipe Balbi <balbi@ti.com> [150102 14:19]:
> The CPSW IP implements pulse-signaled interrupts. Due to
> that we must write a correct, pre-defined value to the
> CPDMA_MACEOIVECTOR register so the controller generates
> a pulse on the correct IRQ line to signal the End Of
> Interrupt.
> 
> The way the driver is written today, all four IRQ lines
> are requested using the same IRQ handler and, because of
> that, we could fall into situations where a TX IRQ fires
> but we tell the controller that we ended an RX IRQ (or
> vice-versa). This situation triggers an IRQ storm on the
> reserved IRQ 127 of INTC which will in turn call ack_bad_irq()
> which will, then, print a ton of:
> 
> 	unexpected IRQ trap at vector 00
> 
> In order to fix the problem, we are moving all calls to
> cpdma_ctlr_eoi() inside the IRQ handler and making sure
> we *always* write the correct value to the CPDMA_MACEOIVECTOR
> register. Note that the algorithm assumes that IRQ numbers and
> value-to-be-written-to-EOI are proportional, meaning that a
> write of value 0 would trigger an EOI pulse for the RX_THRESHOLD
> Interrupt and that's the IRQ number sitting in the 0-th index
> of our irqs_table array.
> 
> This, however, is safe at least for current implementations of
> CPSW so we will refrain from making the check smarter (and, as
> a side-effect, slower) until we actually have a platform where
> IRQ lines are swapped.
> 
> This patch has been tested for several days with AM335x- and
> AM437x-based platforms. AM57x was left out because there are
> still pending patches to enable ethernet in mainline for that
> platform. A read of the TRM confirms the statement on previous
> paragraph.
> 
> Reported-by: Yegor Yefremov <yegorslists@googlemail.com>
> Fixes: 510a1e7 (drivers: net: davinci_cpdma: acknowledge interrupt properly)
> Cc: <stable@vger.kernel.org> # v3.9+
> Signed-off-by: Felipe Balbi <balbi@ti.com>

Makes sense to me. I've seen similar EOI handling issue with
davinci-emac recently that I'll post some fixes for soonish.
It seems the EOI registers just gate the interrupt lines at the
device end without affecting the interrupt status, so:

Acked-by: Tony Lindgren <tony@atomide.com>

> ---
> 
> should be applied on 'net' for current -rc
> 
>  drivers/net/ethernet/ti/cpsw.c | 19 ++++++++-----------
>  1 file changed, 8 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
> index c560f9a..e61ee83 100644
> --- a/drivers/net/ethernet/ti/cpsw.c
> +++ b/drivers/net/ethernet/ti/cpsw.c
> @@ -757,6 +757,14 @@ requeue:
>  static irqreturn_t cpsw_interrupt(int irq, void *dev_id)
>  {
>  	struct cpsw_priv *priv = dev_id;
> +	int value = irq - priv->irqs_table[0];
> +
> +	/* NOTICE: Ending IRQ here. The trick with the 'value' variable above
> +	 * is to make sure we will always write the correct value to the EOI
> +	 * register. Namely 0 for RX_THRESH Interrupt, 1 for RX Interrupt, 2
> +	 * for TX Interrupt and 3 for MISC Interrupt.
> +	 */
> +	cpdma_ctlr_eoi(priv->dma, value);
>  
>  	cpsw_intr_disable(priv);
>  	if (priv->irq_enabled == true) {
> @@ -786,8 +794,6 @@ static int cpsw_poll(struct napi_struct *napi, int budget)
>  	int			num_tx, num_rx;
>  
>  	num_tx = cpdma_chan_process(priv->txch, 128);
> -	if (num_tx)
> -		cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX);
>  
>  	num_rx = cpdma_chan_process(priv->rxch, budget);
>  	if (num_rx < budget) {
> @@ -795,7 +801,6 @@ static int cpsw_poll(struct napi_struct *napi, int budget)
>  
>  		napi_complete(napi);
>  		cpsw_intr_enable(priv);
> -		cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX);
>  		prim_cpsw = cpsw_get_slave_priv(priv, 0);
>  		if (prim_cpsw->irq_enabled == false) {
>  			prim_cpsw->irq_enabled = true;
> @@ -1310,8 +1315,6 @@ static int cpsw_ndo_open(struct net_device *ndev)
>  	napi_enable(&priv->napi);
>  	cpdma_ctlr_start(priv->dma);
>  	cpsw_intr_enable(priv);
> -	cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX);
> -	cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX);
>  
>  	prim_cpsw = cpsw_get_slave_priv(priv, 0);
>  	if (prim_cpsw->irq_enabled == false) {
> @@ -1578,9 +1581,6 @@ static void cpsw_ndo_tx_timeout(struct net_device *ndev)
>  	cpdma_chan_start(priv->txch);
>  	cpdma_ctlr_int_ctrl(priv->dma, true);
>  	cpsw_intr_enable(priv);
> -	cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX);
> -	cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX);
> -
>  }
>  
>  static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p)
> @@ -1620,9 +1620,6 @@ static void cpsw_ndo_poll_controller(struct net_device *ndev)
>  	cpsw_interrupt(ndev->irq, priv);
>  	cpdma_ctlr_int_ctrl(priv->dma, true);
>  	cpsw_intr_enable(priv);
> -	cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX);
> -	cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX);
> -
>  }
>  #endif
>  
> -- 
> 2.2.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-omap" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: linux-next: build failure after merge of the net-next tree
From: Stephen Rothwell @ 2015-01-02 23:32 UTC (permalink / raw)
  To: Sedat Dilek
  Cc: David Miller, netdev@vger.kernel.org, linux-next, LKML,
	Richard Cochran, Jeff Kirsher
In-Reply-To: <CA+icZUUmm5=YAZCXtk+iBaewFremZ17M2uShx+jBa4=JcTuaQA@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 1649 bytes --]

Hi Sedat,

On Fri, 2 Jan 2015 23:21:05 +0100 Sedat Dilek <sedat.dilek@gmail.com> wrote:
>
> On Fri, Jan 2, 2015 at 11:11 PM, Stephen Rothwell <sfr@canb.auug.org.au> wrote:
> >
> > After merging the net-next tree, today's linux-next build (powerpc
> > ppc64_defconfig) failed like this:
> >
> > drivers/net/ethernet/mellanox/mlx4/en_clock.c: In function 'mlx4_en_init_timestamp':
> > drivers/net/ethernet/mellanox/mlx4/en_clock.c:249:2: error: implicit declaration of function 'CLOCKSOURCE_MASK' [-Werror=implicit-function-declaration]
> >   mdev->cycles.mask = CLOCKSOURCE_MASK(48);
> >   ^
> > drivers/net/ethernet/mellanox/mlx4/en_clock.c:257:3: error: implicit declaration of function 'clocksource_khz2mult' [-Werror=implicit-function-declaration]
> >    clocksource_khz2mult(1000 * dev->caps.hca_core_clock, mdev->cycles.shift);
> >    ^
> > drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c: In function 'ixgbe_ptp_start_cyclecounter':
> > drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c:796:2: error: implicit declaration of function 'CLOCKSOURCE_MASK' [-Werror=implicit-function-declaration]
> >   adapter->cc.mask = CLOCKSOURCE_MASK(64);
> >   ^
> >
> > Presumably caused by commit 74d23cc704d1 ("time: move the
> > timecounter/cyclecounter code into its own file").
> 
> Just FYI...
> 
> Richard posted a new patch-series "[PATCH net-next 0/7] Fixing the
> "Time Counter fixes and improvements" on linux-nextdev fixing this.
> 
> - Sedat -
> 
> [1] https://lkml.org/lkml/2015/1/1/27

Thanks, so hopefully I won't need my patch for very long.
-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: [PATCH net-next 1/3] net: add IPv4 routing FIB support for swdev
From: roopa @ 2015-01-02 22:57 UTC (permalink / raw)
  To: Arad, Ronen
  Cc: Scott Feldman, Netdev, Jirí Pírko, john fastabend,
	Thomas Graf, Jamal Hadi Salim, Andy Gospodarek
In-Reply-To: <E4CD12F19ABA0C4D8729E087A761DC3505DD315B@ORSMSX101.amr.corp.intel.com>

On 1/2/15, 3:39 AM, Arad, Ronen wrote:
>
>> -----Original Message-----
>> From: netdev-owner@vger.kernel.org [mailto:netdev-owner@vger.kernel.org] On
>> Behalf Of Scott Feldman
>> Sent: Friday, January 02, 2015 10:01 AM
>> To: roopa
>> Cc: Netdev; Jiří Pírko; john fastabend; Thomas Graf; Jamal Hadi Salim; Andy
>> Gospodarek
>> Subject: Re: [PATCH net-next 1/3] net: add IPv4 routing FIB support for swdev
>>
>> On Thu, Jan 1, 2015 at 9:49 PM, roopa <roopa@cumulusnetworks.com> wrote:
>>> On 1/1/15, 7:29 PM, sfeldma@gmail.com wrote:
>>>> From: Scott Feldman <sfeldma@gmail.com>
>>>>
>>>> To offload IPv4 L3 routing functions to swdev device, the swdev device
>>>> driver
>>>> implements two new ndo ops (ndo_switch_fib_ipv4_add/del).  The ops are
>>>> called
>>>> by the core IPv4 FIB code when installing/removing FIB entries to/from the
>>>> kernel FIB.  On install, the driver should return 0 if FIB entry (route)
>>>> can be
>>>> installed to device for offloading, -EOPNOTSUPP if route cannot be
>>>> installed
>>>> due to device limitations, and other negative error code on failure to
>>>> install
>>>> route to device.  On failure error code, the route is not installed to
>>>> device,
>>>> and not installed in kernel FIB, and the return code is propagated back to
>>>> the
>>>> user-space caller (via netlink).  An -EOPNOTSUPP error code is skipped for
>>>> the
>>>> device but installed in the kernel FIB.
>>>>
>>>> The FIB entry (route) nexthop list is used to find the swdev device port
>>>> to
>>>> anchor the ndo op call.  The route's fib_dev (the first nexthop's dev) is
>>>> used
>>>> find the swdev port by recursively traversing the fib_dev's lower_dev list
>>>> until a swdev port is found.  The ndo op is called on this swdev port.
>>>
>>> scott, I posted a similar api for bridge attribute sets. But, nobody
>>> supported it.
>>> http://marc.info/?l=linux-netdev&m=141820234410602&w=2
>>>
>>> If this is acceptable, I will be resubmitting my api as well.
>>>
>> This may get shot down as well, who knows?
>>
>> For routes, the nexthop dev may be a bridge or a bond for an IP on the
>> router, so we have no choice but to walk down from the bridge or the
>> bond to find a swport dev to call the ndo op to install the route.
>>
> Another case is when VLAN-aware bridge with VLAN filtering is used. In that
> case IP interfaces are VLAN interfaces created on top of the bridge.
>
>> For bridge settings, I remember someone raised the issue that settings
>> should be propagated down the dev hierarchy, with parent calling
>> child's op and so on.  I'll go back and look at your post.
>>
> This was my comment. I'm not sure it was correct. My concern was the VLAN
> interface on top of a VLAN-aware bridge use-case. I now believe that such
> interfaces are upper devices of the bridge (not master). Therefore, it seems
> that traversal starting at a VLAN interface on top of a bridge will follow a
> path: VLAN interface => bridge => [team/bond] => switchdev port.
for l3 this seems right. My patches were doing the same thing only for 
l2...vlan filtering bridge,
and those were only for bridge attributes (learning, flooding etc), 
which will be like below:
bridge => [team/bond] => switchdev port.

> One complication here is that the VLAN context is important. A "naked" nexthop
> shall only be resolved within the VLAN associated with the VLAN interface. When
> ARP resolution is performed by Linux stack, it goes via the VLAN interface
> which imposes a tag on the packet before handing it to the bridge. The VLAN-
> aware bridge floods such packet only to member ports of the VLAN. This behavior
> of the software bridge has to be preserved with offloaded L3 forwarding and
> offloaded L2 switching.
>>>
>>>> Since the FIB entry is "naked" when push from the kernel, the
>>>> driver/device
>>>> is responsible for resolving the route's nexthops to neighbor MAC
>>>> addresses.
>>>> This can be done by the driver by monitoring NETEVENT_NEIGH_UPDATE
>>>> netevent notifier to watch for ARP activity.  Once a nexthop is resolved
>>>> to
>>>> neighbor MAC address, it can be installed to the device and the device
>>>> will
>>>> do the L3 routing offload in HW, for that nexthop.
>>>>
>>>> Signed-off-by: Scott Feldman <sfeldma@gmail.com>
>>>> Signed-off-by: Jiri Pirko <jiri@resnulli.us>
>>>> ---
>>>>    include/linux/netdevice.h |   22 +++++++++++
>>>>    include/net/switchdev.h   |   18 +++++++++
>>>>    net/ipv4/fib_trie.c       |   17 ++++++++-
>>>>    net/switchdev/switchdev.c |   89
>>>> +++++++++++++++++++++++++++++++++++++++++++++
>>>>    4 files changed, 145 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
>>>> index 679e6e9..b66d22b 100644
>>>> --- a/include/linux/netdevice.h
>>>> +++ b/include/linux/netdevice.h
>>>> @@ -767,6 +767,8 @@ struct netdev_phys_item_id {
>>>>    typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
>>>>                                         struct sk_buff *skb);
>>>>    +struct fib_info;
>>>> +
>>>>    /*
>>>>     * This structure defines the management hooks for network devices.
>>>>     * The following hooks can be defined; unless noted otherwise, they are
>>>> @@ -1030,6 +1032,14 @@ typedef u16 (*select_queue_fallback_t)(struct
>>>> net_device *dev,
>>>>     * int (*ndo_switch_port_stp_update)(struct net_device *dev, u8 state);
>>>>     *    Called to notify switch device port of bridge port STP
>>>>     *    state change.
>>>> + * int (*ndo_sw_parent_fib_ipv4_add)(struct net_device *dev, __be32 dst,
>>>> + *                                  int dst_len, struct fib_info *fi,
>>>> + *                                  u8 tos, u8 type, u32 tb_id);
>>>> + *     Called to add IPv4 route to switch device.
>>>> + * int (*ndo_sw_parent_fib_ipv4_del)(struct net_device *dev, __be32 dst,
>>>> + *                                  int dst_len, struct fib_info *fi,
>>>> + *                                  u8 tos, u8 type, u32 tb_id);
>>>> + *     Called to delete IPv4 route from switch device.
>>>>     */
>>>>    struct net_device_ops {
>>>>          int                     (*ndo_init)(struct net_device *dev);
>>>> @@ -1189,6 +1199,18 @@ struct net_device_ops {
>>>>                                                              struct
>>>> netdev_phys_item_id *psid);
>>>>          int                     (*ndo_switch_port_stp_update)(struct
>>>> net_device *dev,
>>>>                                                                u8 state);
>>>> +       int                     (*ndo_switch_fib_ipv4_add)(struct
>>>> net_device *dev,
>>>> +                                                          __be32 dst,
>>>> +                                                          int dst_len,
>>>> +                                                          struct fib_info
>>>> *fi,
>>>> +                                                          u8 tos, u8
>>>> type,
>>>> +                                                          u32 tb_id);
>>>> +       int                     (*ndo_switch_fib_ipv4_del)(struct
>>>> net_device *dev,
>>>> +                                                          __be32 dst,
>>>> +                                                          int dst_len,
>>>> +                                                          struct fib_info
>>>> *fi,
>>>> +                                                          u8 tos, u8
>>>> type,
>>>> +                                                          u32 tb_id);
>>>>    #endif
>>>>    };
>>>>    diff --git a/include/net/switchdev.h b/include/net/switchdev.h
>>>> index 8a6d164..caebc2a 100644
>>>> --- a/include/net/switchdev.h
>>>> +++ b/include/net/switchdev.h
>>>> @@ -17,6 +17,10 @@
>>>>    int netdev_switch_parent_id_get(struct net_device *dev,
>>>>                                  struct netdev_phys_item_id *psid);
>>>>    int netdev_switch_port_stp_update(struct net_device *dev, u8 state);
>>>> +int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
>>>> +                              u8 tos, u8 type, u32 tb_id);
>>>> +int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
>>>> +                              u8 tos, u8 type, u32 tb_id);
>>>>      #else
>>>>    @@ -32,6 +36,20 @@ static inline int
>>>> netdev_switch_port_stp_update(struct net_device *dev,
>>>>          return -EOPNOTSUPP;
>>>>    }
>>>>    +static inline int netdev_switch_fib_ipv4_add(u32 dst, int dst_len,
>>>> +                                            struct fib_info *fi,
>>>> +                                            u8 tos, u8 type, u32 tb_id)
>>>> +{
>>>> +       return -EOPNOTSUPP;
>>>> +}
>>>> +
>>>> +static inline int netdev_switch_fib_ipv4_del(u32 dst, int dst_len,
>>>> +                                            struct fib_info *fi,
>>>> +                                            u8 tos, u8 type, u32 tb_id)
>>>> +{
>>>> +       return -EOPNOTSUPP;
>>>> +}
>>>> +
>>>>    #endif
>>>>      #endif /* _LINUX_SWITCHDEV_H_ */
>>>> diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
>>>> index 281e5e0..ea2dc17 100644
>>>> --- a/net/ipv4/fib_trie.c
>>>> +++ b/net/ipv4/fib_trie.c
>>>> @@ -79,6 +79,7 @@
>>>>    #include <net/tcp.h>
>>>>    #include <net/sock.h>
>>>>    #include <net/ip_fib.h>
>>>> +#include <net/switchdev.h>
>>>>    #include "fib_lookup.h"
>>>>      #define MAX_STAT_DEPTH 32
>>>> @@ -1201,6 +1202,8 @@ int fib_table_insert(struct fib_table *tb, struct
>>>> fib_config *cfg)
>>>>                          fib_release_info(fi_drop);
>>>>                          if (state & FA_S_ACCESSED)
>>>>                                  rt_cache_flush(cfg->fc_nlinfo.nl_net);
>>>> +                       netdev_switch_fib_ipv4_add(key, plen, fi,
>>>> fa->fa_tos,
>>>> +                                                  cfg->fc_type,
>>>> tb->tb_id);
>>>>                          rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
>>>>                                  tb->tb_id, &cfg->fc_nlinfo,
>>>> NLM_F_REPLACE);
>>>>    @@ -1229,6 +1232,13 @@ int fib_table_insert(struct fib_table *tb, struct
>>>> fib_config *cfg)
>>>>          new_fa->fa_tos = tos;
>>>>          new_fa->fa_type = cfg->fc_type;
>>>>          new_fa->fa_state = 0;
>>>> +
>>>> +       /* (Optionally) offload fib info to switch hardware. */
>>>> +       err = netdev_switch_fib_ipv4_add(key, plen, fi, tos,
>>>> +                                        cfg->fc_type, tb->tb_id);
>>>> +       if (err && err != -EOPNOTSUPP)
>>>> +               goto out_free_new_fa;
>>>> +
>>>>          /*
>>>>           * Insert new entry to the list.
>>>>           */
>>>> @@ -1237,7 +1247,7 @@ int fib_table_insert(struct fib_table *tb, struct
>>>> fib_config *cfg)
>>>>                  fa_head = fib_insert_node(t, key, plen);
>>>>                  if (unlikely(!fa_head)) {
>>>>                          err = -ENOMEM;
>>>> -                       goto out_free_new_fa;
>>>> +                       goto out_sw_fib_del;
>>>>                  }
>>>>          }
>>>>    @@ -1253,6 +1263,8 @@ int fib_table_insert(struct fib_table *tb, struct
>>>> fib_config *cfg)
>>>>    succeeded:
>>>>          return 0;
>>>>    +out_sw_fib_del:
>>>> +       netdev_switch_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type,
>>>> tb->tb_id);
>>>>    out_free_new_fa:
>>>>          kmem_cache_free(fn_alias_kmem, new_fa);
>>>>    out:
>>>> @@ -1529,6 +1541,9 @@ int fib_table_delete(struct fib_table *tb, struct
>>>> fib_config *cfg)
>>>>          rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
>>>>                    &cfg->fc_nlinfo, 0);
>>>>    +     netdev_switch_fib_ipv4_del(key, plen, fa->fa_info, tos,
>>>> +                                  cfg->fc_type, tb->tb_id);
>>>> +
>>>>          list_del_rcu(&fa->fa_list);
>>>>          if (!plen)
>>>> diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
>>>> index d162b21..211a8a0 100644
>>>> --- a/net/switchdev/switchdev.c
>>>> +++ b/net/switchdev/switchdev.c
>>>> @@ -12,6 +12,7 @@
>>>>    #include <linux/types.h>
>>>>    #include <linux/init.h>
>>>>    #include <linux/netdevice.h>
>>>> +#include <net/ip_fib.h>
>>>>    #include <net/switchdev.h>
>>>>      /**
>>>> @@ -50,3 +51,91 @@ int netdev_switch_port_stp_update(struct net_device
>>>> *dev, u8 state)
>>>>          return ops->ndo_switch_port_stp_update(dev, state);
>>>>    }
>>>>    EXPORT_SYMBOL(netdev_switch_port_stp_update);
>>>> +
>>>> +static struct net_device *netdev_switch_get_by_fib_dev(struct net_device
>>>> *dev)
>>>> +{
>>>> +       const struct net_device_ops *ops = dev->netdev_ops;
>>>> +       struct net_device *lower_dev;
>>>> +       struct net_device *port_dev;
>>>> +       struct list_head *iter;
>>>> +
>>>> +       /* Recusively search from fib_dev down until we find
>>>> +        * a sw port dev.  (A sw port dev supports
>>>> +        * ndo_switch_parent_id_get).
>>>> +        */
>>>> +
>>>> +       if (ops->ndo_switch_parent_id_get)
>>>> +               return dev;
>>>> +
>>>> +       netdev_for_each_lower_dev(dev, lower_dev, iter) {
>>>> +               port_dev = netdev_switch_get_by_fib_dev(lower_dev);
>>>> +               if (port_dev)
>>>> +                       return port_dev;
>>>> +       }
>>>> +
>>>> +       return NULL;
>>>> +}
>>>> +
>>>> +/**
>>>> + *     netdev_switch_fib_ipv4_add - Add IPv4 route entry to switch
>>>> + *
>>>> + *     @dst: route's IPv4 destination address
>>>> + *     @dst_len: destination address length (prefix length)
>>>> + *     @fi: route FIB info structure
>>>> + *     @tos: route TOS
>>>> + *     @type: route type
>>>> + *     @tb_id: route table ID
>>>> + *
>>>> + *     Add IPv4 route entry to switch device.
>>>> + */
>>>> +int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
>>>> +                              u8 tos, u8 type, u32 tb_id)
>>>> +{
>>>> +       struct net_device *dev;
>>>> +       const struct net_device_ops *ops;
>>>> +       int err = -EOPNOTSUPP;
>>>> +
>>>> +       dev = netdev_switch_get_by_fib_dev(fi->fib_dev);
>>>> +       if (!dev)
>>>> +               return -EOPNOTSUPP;
>>>> +       ops = dev->netdev_ops;
>>>> +
>>>> +       if (ops->ndo_switch_fib_ipv4_add)
>>>> +               err = ops->ndo_switch_fib_ipv4_add(dev, htonl(dst),
>>>> dst_len,
>>>> +                                                  fi, tos, type, tb_id);
>>>> +
>>>> +       return err;
>>>> +}
>>>> +EXPORT_SYMBOL(netdev_switch_fib_ipv4_add);
>>>> +
>>>> +/**
>>>> + *     netdev_switch_fib_ipv4_del - Delete IPv4 route entry from switch
>>>> + *
>>>> + *     @dst: route's IPv4 destination address
>>>> + *     @dst_len: destination address length (prefix length)
>>>> + *     @fi: route FIB info structure
>>>> + *     @tos: route TOS
>>>> + *     @type: route type
>>>> + *     @tb_id: route table ID
>>>> + *
>>>> + *     Delete IPv4 route entry from switch device.
>>>> + */
>>>> +int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
>>>> +                              u8 tos, u8 type, u32 tb_id)
>>>> +{
>>>> +       struct net_device *dev;
>>>> +       const struct net_device_ops *ops;
>>>> +       int err = -EOPNOTSUPP;
>>>> +
>>>> +       dev = netdev_switch_get_by_fib_dev(fi->fib_dev);
>>>> +       if (!dev)
>>>> +               return -EOPNOTSUPP;
>>>> +       ops = dev->netdev_ops;
>>>> +
>>>> +       if (ops->ndo_switch_fib_ipv4_del)
>>>> +               err = ops->ndo_switch_fib_ipv4_del(dev, htonl(dst),
>>>> dst_len,
>>>> +                                                  fi, tos, type, tb_id);
>>>> +
>>>> +       return err;
>>>> +}
>>>> +EXPORT_SYMBOL(netdev_switch_fib_ipv4_del);
>>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe netdev" in
>> the body of a message to majordomo@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> N�����r��y���b�X��ǧv�^�)޺{.n�+���z�^�)���w*\x1fjg���\x1e�����ݢj/���z�ޖ��2�ޙ���&�)ߡ�a��\x7f��\x1e�G���h�\x0f�j:+v���w�٥

^ permalink raw reply

* Re: [PATCH 3/4] net: ethernet: cpsw: split out IRQ handler
From: Dave Taht @ 2015-01-02 22:56 UTC (permalink / raw)
  To: Felipe Balbi; +Cc: netdev@vger.kernel.org, Linux OMAP Mailing List
In-Reply-To: <20150102190350.GC4920@saruman>

On Fri, Jan 2, 2015 at 11:03 AM, Felipe Balbi <balbi@ti.com> wrote:
> Hi,
>
> (please use reply-all to keep mailing lists in Cc, also avoid
> top-posting)

I am trying not to read netdev right now... and failing, obviously.

>
> On Fri, Jan 02, 2015 at 10:58:29AM -0800, Dave Taht wrote:
>> The beaglebone only has a 100mbit phy, so you aren't going to get more
>> than that.
>
> very true :-) Still, with AM437x SK which is definitely GigE, I'm
> getting 201Mbits/sec.
>
>> (so do a lot of IoT devices).
>>
>> So you have the two patches that went by on BQL and on NAPI for the beagle?
>
> no, got any pointers ?

the relevant thread was "am335x: cpsw: phy ignores max-speed setting"

and the initial very small BQL enablement patch was here:

https://patchwork.ozlabs.org/patch/407640/

(it needed a saner treatment of a failure to dma something in
cpsw_tx_packet_submit  - the patch as is has also been part of nelsons
trees for the beaglebone for a while)

But it was rightly pointed out later in the thread that this change

+#define CPSW_POLL_WEIGHT 16

made for the biggest part of the improvement, and someone else on the
thread proposed handling that more dynamically for 100mbit phys with
another patch (that I can't find at the moment)

... but the root cause of the excessive latency in this driver was the
single tx/rx dma queue, which you are addressing  in your patch set.
So if you glop on more of the above, mo better, perhaps you will win
bigger.

I will try to slice out some time to boot up a beagle on net-next next week.

>> On Fri, Jan 2, 2015 at 10:55 AM, Felipe Balbi <balbi@ti.com> wrote:
>> > Hi,
>> >
>> > On Fri, Jan 02, 2015 at 10:49:49AM -0800, Dave Taht wrote:
>> >> +1.
>> >>
>> >> We'd had a thread on netdev (can't find it now) where we discussed
>> >> adding BQL support and also something saner for the NAPI handling to
>> >> this driver.
>> >
>> > yeah, currently is completely borked. I'm on a gigabit network and I'm
>> > getting 94Mbits/sec, total crap.
>> >
>> >> Initial results for the beaglebone black were pretty spectacular, and
>> >> it does look like this is way cleaner infrastructure underneat th deal
>> >> with. Are you testing
>> >
>> > cool, if I new more about networking I'd certainly help, but I can help
>> > testing for sure, just keep me in Cc ;-)
>> >
>> >> on the beaglebone black.? do you remember that convo?
>> >
>> > yeah, testing on beagleboneblack and AM437x SK.
>> >
>> > cheers
>> >
>> >> On Fri, Jan 2, 2015 at 10:10 AM, Felipe Balbi <balbi@ti.com> wrote:
>> >> > Now we can introduce dedicated IRQ handlers
>> >> > for each of the IRQ events. This helps with
>> >> > cleaning up a little bit of the clutter in
>> >> > cpsw_interrupt() while also making sure that
>> >> > TX IRQs will try to handle TX buffers while
>> >> > RX IRQs will try to handle RX buffers.
>> >> >
>> >> > Signed-off-by: Felipe Balbi <balbi@ti.com>
>> >> > ---
>> >> >  drivers/net/ethernet/ti/cpsw.c | 41 ++++++++++++++++++++++++++++++-----------
>> >> >  1 file changed, 30 insertions(+), 11 deletions(-)
>> >> >
>> >> > diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
>> >> > index 6e04128..c9081bd 100644
>> >> > --- a/drivers/net/ethernet/ti/cpsw.c
>> >> > +++ b/drivers/net/ethernet/ti/cpsw.c
>> >> > @@ -754,18 +754,36 @@ requeue:
>> >> >                 dev_kfree_skb_any(new_skb);
>> >> >  }
>> >> >
>> >> > -static irqreturn_t cpsw_interrupt(int irq, void *dev_id)
>> >> > +static irqreturn_t cpsw_dummy_interrupt(int irq, void *dev_id)
>> >> >  {
>> >> >         struct cpsw_priv *priv = dev_id;
>> >> >         int value = irq - priv->irqs_table[0];
>> >> >
>> >> > -       /* NOTICE: Ending IRQ here. The trick with the 'value' variable above
>> >> > -        * is to make sure we will always write the correct value to the EOI
>> >> > -        * register. Namely 0 for RX_THRESH Interrupt, 1 for RX Interrupt, 2
>> >> > -        * for TX Interrupt and 3 for MISC Interrupt.
>> >> > -        */
>> >> >         cpdma_ctlr_eoi(priv->dma, value);
>> >> >
>> >> > +       return IRQ_HANDLED;
>> >> > +}
>> >> > +
>> >> > +static irqreturn_t cpsw_tx_interrupt(int irq, void *dev_id)
>> >> > +{
>> >> > +       struct cpsw_priv *priv = dev_id;
>> >> > +
>> >> > +       cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX);
>> >> > +       cpdma_chan_process(priv->txch, 128);
>> >> > +
>> >> > +       priv = cpsw_get_slave_priv(priv, 1);
>> >> > +       if (priv)
>> >> > +               cpdma_chan_process(priv->txch, 128);
>> >> > +
>> >> > +       return IRQ_HANDLED;
>> >> > +}
>> >> > +
>> >> > +static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id)
>> >> > +{
>> >> > +       struct cpsw_priv *priv = dev_id;
>> >> > +
>> >> > +       cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX);
>> >> > +
>> >> >         cpsw_intr_disable(priv);
>> >> >         if (priv->irq_enabled == true) {
>> >> >                 cpsw_disable_irq(priv);
>> >> > @@ -1617,7 +1635,8 @@ static void cpsw_ndo_poll_controller(struct net_device *ndev)
>> >> >
>> >> >         cpsw_intr_disable(priv);
>> >> >         cpdma_ctlr_int_ctrl(priv->dma, false);
>> >> > -       cpsw_interrupt(ndev->irq, priv);
>> >> > +       cpsw_rx_interrupt(priv->irq[1], priv);
>> >> > +       cpsw_tx_interrupt(priv->irq[2], priv);
>> >> >         cpdma_ctlr_int_ctrl(priv->dma, true);
>> >> >         cpsw_intr_enable(priv);
>> >> >  }
>> >> > @@ -2351,7 +2370,7 @@ static int cpsw_probe(struct platform_device *pdev)
>> >> >                 goto clean_ale_ret;
>> >> >
>> >> >         priv->irqs_table[0] = irq;
>> >> > -       ret = devm_request_irq(&pdev->dev, irq, cpsw_interrupt,
>> >> > +       ret = devm_request_irq(&pdev->dev, irq, cpsw_dummy_interrupt,
>> >> >                         0, dev_name(&pdev->dev), priv);
>> >> >         if (ret < 0) {
>> >> >                 dev_err(priv->dev, "error attaching irq (%d)\n", ret);
>> >> > @@ -2363,7 +2382,7 @@ static int cpsw_probe(struct platform_device *pdev)
>> >> >                 goto clean_ale_ret;
>> >> >
>> >> >         priv->irqs_table[1] = irq;
>> >> > -       ret = devm_request_irq(&pdev->dev, irq, cpsw_interrupt,
>> >> > +       ret = devm_request_irq(&pdev->dev, irq, cpsw_rx_interrupt,
>> >> >                         0, dev_name(&pdev->dev), priv);
>> >> >         if (ret < 0) {
>> >> >                 dev_err(priv->dev, "error attaching irq (%d)\n", ret);
>> >> > @@ -2375,7 +2394,7 @@ static int cpsw_probe(struct platform_device *pdev)
>> >> >                 goto clean_ale_ret;
>> >> >
>> >> >         priv->irqs_table[2] = irq;
>> >> > -       ret = devm_request_irq(&pdev->dev, irq, cpsw_interrupt,
>> >> > +       ret = devm_request_irq(&pdev->dev, irq, cpsw_tx_interrupt,
>> >> >                         0, dev_name(&pdev->dev), priv);
>> >> >         if (ret < 0) {
>> >> >                 dev_err(priv->dev, "error attaching irq (%d)\n", ret);
>> >> > @@ -2387,7 +2406,7 @@ static int cpsw_probe(struct platform_device *pdev)
>> >> >                 goto clean_ale_ret;
>> >> >
>> >> >         priv->irqs_table[3] = irq;
>> >> > -       ret = devm_request_irq(&pdev->dev, irq, cpsw_interrupt,
>> >> > +       ret = devm_request_irq(&pdev->dev, irq, cpsw_dummy_interrupt,
>> >> >                         0, dev_name(&pdev->dev), priv);
>> >> >         if (ret < 0) {
>> >> >                 dev_err(priv->dev, "error attaching irq (%d)\n", ret);
>> >> > --
>> >> > 2.2.0
>> >> >
>> >> > --
>> >> > To unsubscribe from this list: send the line "unsubscribe netdev" in
>> >> > the body of a message to majordomo@vger.kernel.org
>> >> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> >>
>> >>
>> >>
>> >> --
>> >> Dave Täht
>> >>
>> >> thttp://www.bufferbloat.net/projects/bloat/wiki/Upcoming_Talks
>> >
>> > --
>> > balbi
>>
>>
>>
>> --
>> Dave Täht
>>
>> thttp://www.bufferbloat.net/projects/bloat/wiki/Upcoming_Talks
>
> --
> balbi



-- 
Dave Täht

thttp://www.bufferbloat.net/projects/bloat/wiki/Upcoming_Talks

^ permalink raw reply

* Re: [PATCH net-next] openvswitch: Do not set skb ignore_df
From: Pravin Shelar @ 2015-01-02 22:44 UTC (permalink / raw)
  To: Jesse Gross; +Cc: David Miller, netdev, dev@openvswitch.org
In-Reply-To: <CAEP_g=-dmtKbC+GkOsr1Z+gUB6Q0noFP_O5PhXmhs_w3j0BV0Q@mail.gmail.com>

On Fri, Jan 2, 2015 at 12:03 PM, Jesse Gross <jesse@nicira.com> wrote:
> On Fri, Jan 2, 2015 at 1:27 PM, Pravin B Shelar <pshelar@nicira.com> wrote:
>> Tunnel transmit code clear this bit, so setting ignore_df has
>> no effect.
>>
>> Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
>
> Is it actually right for the bit to be cleared though? As discussed in
> the previous thread on tunnel MTU handling, I think that fragmentation
> should be done as a very last resort.

ok, I missed the discussion, I will send another patch.

^ permalink raw reply

* Re: linux-next: build failure after merge of the net-next tree
From: Sedat Dilek @ 2015-01-02 22:21 UTC (permalink / raw)
  To: Stephen Rothwell
  Cc: David Miller, netdev@vger.kernel.org, linux-next, LKML,
	Richard Cochran, Jeff Kirsher
In-Reply-To: <20150103091101.04b2c11e@canb.auug.org.au>

On Fri, Jan 2, 2015 at 11:11 PM, Stephen Rothwell <sfr@canb.auug.org.au> wrote:
> Hi all,
>
> After merging the net-next tree, today's linux-next build (powerpc
> ppc64_defconfig) failed like this:
>
> drivers/net/ethernet/mellanox/mlx4/en_clock.c: In function 'mlx4_en_init_timestamp':
> drivers/net/ethernet/mellanox/mlx4/en_clock.c:249:2: error: implicit declaration of function 'CLOCKSOURCE_MASK' [-Werror=implicit-function-declaration]
>   mdev->cycles.mask = CLOCKSOURCE_MASK(48);
>   ^
> drivers/net/ethernet/mellanox/mlx4/en_clock.c:257:3: error: implicit declaration of function 'clocksource_khz2mult' [-Werror=implicit-function-declaration]
>    clocksource_khz2mult(1000 * dev->caps.hca_core_clock, mdev->cycles.shift);
>    ^
> drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c: In function 'ixgbe_ptp_start_cyclecounter':
> drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c:796:2: error: implicit declaration of function 'CLOCKSOURCE_MASK' [-Werror=implicit-function-declaration]
>   adapter->cc.mask = CLOCKSOURCE_MASK(64);
>   ^
>
> Presumably caused by commit 74d23cc704d1 ("time: move the
> timecounter/cyclecounter code into its own file").
>

Happy new 2015 Stephen...

[ The last days I revived my linux-next build-script inspired by
willing to test block-loop-mq v3 patchset against next-20141231 ]

Just FYI...

Richard posted a new patch-series "[PATCH net-next 0/7] Fixing the
"Time Counter fixes and improvements" on linux-nextdev fixing this.

- Sedat -

[1] https://lkml.org/lkml/2015/1/1/27

> I added the following commit for today:
>
> From: Stephen Rothwell <sfr@canb.auug.org.au>
> Date: Sat, 3 Jan 2015 09:07:21 +1100
> Subject: [PATCH] ixgbe_ptp, mlx4: Include clocksource.h to get CLOCKSOURCE_MASK
>
> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
> ---
>  drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c  | 1 +
>  drivers/net/ethernet/mellanox/mlx4/en_clock.c | 1 +
>  2 files changed, 2 insertions(+)
>
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
> index 47c29eaaa140..73548280cbae 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
> @@ -25,6 +25,7 @@
>    Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
>
>  *******************************************************************************/
> +#include <linux/clocksource.h>
>  #include "ixgbe.h"
>  #include <linux/ptp_classify.h>
>
> diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
> index e9cce4f72b24..7c6ef4b48f8e 100644
> --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c
> +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
> @@ -31,6 +31,7 @@
>   *
>   */
>
> +#include <linux/clocksource.h>
>  #include <linux/mlx4/device.h>
>
>  #include "mlx4_en.h"
> --
> 2.1.4
>
> --
> Cheers,
> Stephen Rothwell                    sfr@canb.auug.org.au

^ permalink raw reply

* Re: How to fix CHECK warning: testing a 'safe expression'
From: Josh Triplett @ 2015-01-02 22:20 UTC (permalink / raw)
  To: Murali Karicheri; +Cc: netdev, linux-kernel, linux-sparse
In-Reply-To: <54A6B06D.50102@ti.com>

+linux-sparse

On Fri, Jan 02, 2015 at 09:51:25AM -0500, Murali Karicheri wrote:
> On 12/16/2014 01:23 PM, Murali Karicheri wrote:
> >netdev maintainers,
> >
> >I got a comment to address CHECK warning and wondering how to address
> >'warning: testing a 'safe expression' which appears when using
> >IS_ERR_OR_NULL(foo)
> >
> >where foo is defined as
> >
> >struct foo_type *foo;
> >
> >The foo get assigned only NULL or ERR_PTR(error code). So I believe the
> >usage is correct. But then how do I make the CHECK happy of its usage?
> >
> >I have tried doing a grep on the current usage of IS_ERR_OR_NULL() and
> >found 276 of them causes this warning in the v3.18 version of the kernel
> >that I am using
> >
> >$ grep -r "warning: testing a 'safe expression" * | wc -l
> >276
> >
> >1) Can someone explain what this warning means?
> >
> >2) Is it acceptable to post patches to netdev list with this warning?
> >
> >3) if not, how this is expected to be fixed? Any example usage to fix
> >this warning will be helpful.
> >
> >Thanks in advance for
> 
> 
> -- 
> Murali Karicheri
> Linux Kernel, Texas Instruments

^ permalink raw reply

* [PATCH] net: ethernet: cpsw: fix hangs with interrupts
From: Felipe Balbi @ 2015-01-02 22:15 UTC (permalink / raw)
  To: David Miller
  Cc: Mugunthan V N, Yegor Yefremov, Linux OMAP Mailing List, netdev,
	Felipe Balbi, stable

The CPSW IP implements pulse-signaled interrupts. Due to
that we must write a correct, pre-defined value to the
CPDMA_MACEOIVECTOR register so the controller generates
a pulse on the correct IRQ line to signal the End Of
Interrupt.

The way the driver is written today, all four IRQ lines
are requested using the same IRQ handler and, because of
that, we could fall into situations where a TX IRQ fires
but we tell the controller that we ended an RX IRQ (or
vice-versa). This situation triggers an IRQ storm on the
reserved IRQ 127 of INTC which will in turn call ack_bad_irq()
which will, then, print a ton of:

	unexpected IRQ trap at vector 00

In order to fix the problem, we are moving all calls to
cpdma_ctlr_eoi() inside the IRQ handler and making sure
we *always* write the correct value to the CPDMA_MACEOIVECTOR
register. Note that the algorithm assumes that IRQ numbers and
value-to-be-written-to-EOI are proportional, meaning that a
write of value 0 would trigger an EOI pulse for the RX_THRESHOLD
Interrupt and that's the IRQ number sitting in the 0-th index
of our irqs_table array.

This, however, is safe at least for current implementations of
CPSW so we will refrain from making the check smarter (and, as
a side-effect, slower) until we actually have a platform where
IRQ lines are swapped.

This patch has been tested for several days with AM335x- and
AM437x-based platforms. AM57x was left out because there are
still pending patches to enable ethernet in mainline for that
platform. A read of the TRM confirms the statement on previous
paragraph.

Reported-by: Yegor Yefremov <yegorslists@googlemail.com>
Fixes: 510a1e7 (drivers: net: davinci_cpdma: acknowledge interrupt properly)
Cc: <stable@vger.kernel.org> # v3.9+
Signed-off-by: Felipe Balbi <balbi@ti.com>
---

should be applied on 'net' for current -rc

 drivers/net/ethernet/ti/cpsw.c | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index c560f9a..e61ee83 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -757,6 +757,14 @@ requeue:
 static irqreturn_t cpsw_interrupt(int irq, void *dev_id)
 {
 	struct cpsw_priv *priv = dev_id;
+	int value = irq - priv->irqs_table[0];
+
+	/* NOTICE: Ending IRQ here. The trick with the 'value' variable above
+	 * is to make sure we will always write the correct value to the EOI
+	 * register. Namely 0 for RX_THRESH Interrupt, 1 for RX Interrupt, 2
+	 * for TX Interrupt and 3 for MISC Interrupt.
+	 */
+	cpdma_ctlr_eoi(priv->dma, value);

 	cpsw_intr_disable(priv);
 	if (priv->irq_enabled == true) {
@@ -786,8 +794,6 @@ static int cpsw_poll(struct napi_struct *napi, int budget)
 	int			num_tx, num_rx;

 	num_tx = cpdma_chan_process(priv->txch, 128);
-	if (num_tx)
-		cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX);

 	num_rx = cpdma_chan_process(priv->rxch, budget);
 	if (num_rx < budget) {
@@ -795,7 +801,6 @@ static int cpsw_poll(struct napi_struct *napi, int budget)

 		napi_complete(napi);
 		cpsw_intr_enable(priv);
-		cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX);
 		prim_cpsw = cpsw_get_slave_priv(priv, 0);
 		if (prim_cpsw->irq_enabled == false) {
 			prim_cpsw->irq_enabled = true;
@@ -1310,8 +1315,6 @@ static int cpsw_ndo_open(struct net_device *ndev)
 	napi_enable(&priv->napi);
 	cpdma_ctlr_start(priv->dma);
 	cpsw_intr_enable(priv);
-	cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX);
-	cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX);

 	prim_cpsw = cpsw_get_slave_priv(priv, 0);
 	if (prim_cpsw->irq_enabled == false) {
@@ -1578,9 +1581,6 @@ static void cpsw_ndo_tx_timeout(struct net_device *ndev)
 	cpdma_chan_start(priv->txch);
 	cpdma_ctlr_int_ctrl(priv->dma, true);
 	cpsw_intr_enable(priv);
-	cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX);
-	cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX);
-
 }

 static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p)
@@ -1620,9 +1620,6 @@ static void cpsw_ndo_poll_controller(struct net_device *ndev)
 	cpsw_interrupt(ndev->irq, priv);
 	cpdma_ctlr_int_ctrl(priv->dma, true);
 	cpsw_intr_enable(priv);
-	cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX);
-	cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX);
-
 }
 #endif

-- 
2.2.0

^ permalink raw reply related

* linux-next: build failure after merge of the net-next tree
From: Stephen Rothwell @ 2015-01-02 22:11 UTC (permalink / raw)
  To: David Miller, netdev
  Cc: linux-next, linux-kernel, Richard Cochran, Jeff Kirsher

[-- Attachment #1: Type: text/plain, Size: 2463 bytes --]

Hi all,

After merging the net-next tree, today's linux-next build (powerpc
ppc64_defconfig) failed like this:

drivers/net/ethernet/mellanox/mlx4/en_clock.c: In function 'mlx4_en_init_timestamp':
drivers/net/ethernet/mellanox/mlx4/en_clock.c:249:2: error: implicit declaration of function 'CLOCKSOURCE_MASK' [-Werror=implicit-function-declaration]
  mdev->cycles.mask = CLOCKSOURCE_MASK(48);
  ^
drivers/net/ethernet/mellanox/mlx4/en_clock.c:257:3: error: implicit declaration of function 'clocksource_khz2mult' [-Werror=implicit-function-declaration]
   clocksource_khz2mult(1000 * dev->caps.hca_core_clock, mdev->cycles.shift);
   ^
drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c: In function 'ixgbe_ptp_start_cyclecounter':
drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c:796:2: error: implicit declaration of function 'CLOCKSOURCE_MASK' [-Werror=implicit-function-declaration]
  adapter->cc.mask = CLOCKSOURCE_MASK(64);
  ^

Presumably caused by commit 74d23cc704d1 ("time: move the
timecounter/cyclecounter code into its own file").

I added the following commit for today:

From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Sat, 3 Jan 2015 09:07:21 +1100
Subject: [PATCH] ixgbe_ptp, mlx4: Include clocksource.h to get CLOCKSOURCE_MASK

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c  | 1 +
 drivers/net/ethernet/mellanox/mlx4/en_clock.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
index 47c29eaaa140..73548280cbae 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
@@ -25,6 +25,7 @@
   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
 
 *******************************************************************************/
+#include <linux/clocksource.h>
 #include "ixgbe.h"
 #include <linux/ptp_classify.h>
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
index e9cce4f72b24..7c6ef4b48f8e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
@@ -31,6 +31,7 @@
  *
  */
 
+#include <linux/clocksource.h>
 #include <linux/mlx4/device.h>
 
 #include "mlx4_en.h"
-- 
2.1.4

-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au

[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply related

* Re: tcp: Do not apply TSO segment limit to non-TSO packets
From: Herbert Xu @ 2015-01-02 22:09 UTC (permalink / raw)
  To: David Miller
  Cc: eric.dumazet, thomas.jarosch, netdev, edumazet, steffen.klassert,
	bhutchings
In-Reply-To: <20150102.170629.1474417873101408441.davem@davemloft.net>

On Fri, Jan 02, 2015 at 05:06:29PM -0500, David Miller wrote:
>
> I think the rarity of PMTU events on non-VPN'd connections plays
> a part in how long it took as well.

Maybe in your neck of the woods but certainly in China I observe
loads of PMTU events without involving any VPNs at all :)

In fact even the fibre connections here use PPPOE so PMTU is
sort of unavoidable.

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* Re: tcp: Do not apply TSO segment limit to non-TSO packets
From: David Miller @ 2015-01-02 22:06 UTC (permalink / raw)
  To: herbert
  Cc: eric.dumazet, thomas.jarosch, netdev, edumazet, steffen.klassert,
	bhutchings
In-Reply-To: <20150102220107.GA28599@gondor.apana.org.au>

From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sat, 3 Jan 2015 09:01:07 +1100

> So has anyone actually observed worse performance with GSO on these
> devices (you could even test on a GSO-capable device simply by
> disabling checksumming)?

Good question.

> Also the fact that this bug took two years to surface means that
> very few people are actually using non-GSO in the real world as
> this bug is easily triggered by a PMTU event.

I think the rarity of PMTU events on non-VPN'd connections plays
a part in how long it took as well.

But I totally accept your point, for sure.

^ permalink raw reply

* Re: [PATCH 0/4] net: cpsw: fix hangs and improve IRQ handling
From: David Miller @ 2015-01-02 22:04 UTC (permalink / raw)
  To: balbi; +Cc: mugunthanvnm, yegorslists, linux-omap, netdev
In-Reply-To: <20150102215335.GI4920@saruman>

From: Felipe Balbi <balbi@ti.com>
Date: Fri, 2 Jan 2015 15:53:35 -0600

> On Fri, Jan 02, 2015 at 04:45:36PM -0500, David Miller wrote:
>> You should instead submit patch #1 all by itself, correctly targetting
>> 'net'.
>> 
>> Then, after the next time I merge 'net' into 'net-next', you can submit
>> the rest of the changes.
> 
> then take patch 1 and I'll resend the other in a couple weeks, no
> problem.

Please resubmit the patches in the proper manner.

^ permalink raw reply

* Re: tcp: Do not apply TSO segment limit to non-TSO packets
From: Herbert Xu @ 2015-01-02 22:01 UTC (permalink / raw)
  To: David Miller
  Cc: eric.dumazet, thomas.jarosch, netdev, edumazet, steffen.klassert,
	bhutchings
In-Reply-To: <20150102.153655.1853692198479011402.davem@davemloft.net>

On Fri, Jan 02, 2015 at 03:36:55PM -0500, David Miller wrote:
> From: Eric Dumazet <eric.dumazet@gmail.com>
> Date: Fri, 02 Jan 2015 10:24:00 -0800
>
> > Non TSO/GSO path is known to be better for devices unable to perform TX
> > checksumming, as we compute the checksum at the time we copy data from
> > user to kernel (csum_and_copy_from_user() from tcp_sendmsg())).
> 
> Non-SG capable devices suffer in this scenerio as well.

Yes I was referring to using GSO on non-SG/non-checksumming devices.
Anything that supports checksum/SG should obviously be using GSO.

IIRC when I first tested this GSO is basically on par for the non-SG
case as the overhead of the extra copying was offset by the benefit
of a larger MTU through the stack.

So has anyone actually observed worse performance with GSO on these
devices (you could even test on a GSO-capable device simply by
disabling checksumming)?

Also the fact that this bug took two years to surface means that
very few people are actually using non-GSO in the real world as
this bug is easily triggered by a PMTU event.

Cheers,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox