Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH net-next 2/2] net/sched: cls_flower: Support matching on ICMP type and code
From: Simon Horman @ 2016-12-02 20:33 UTC (permalink / raw)
  To: Jiri Pirko
  Cc: David Miller, netdev, Jay Vosburgh, Veaceslav Falico,
	Andy Gospodarek, Jamal Hadi Salim, Jiri Pirko
In-Reply-To: <20161202191712.GA32484@penelope.horms.nl>

Hi Jiri,

On Fri, Dec 02, 2016 at 08:17:13PM +0100, Simon Horman wrote:
> On Fri, Dec 02, 2016 at 07:38:48PM +0100, Jiri Pirko wrote:
> > Fri, Dec 02, 2016 at 07:05:51PM CET, simon.horman@netronome.com wrote:
> > >Support matching on ICMP type and code.

...

> > This hunk looks like it should be squashed to the previous patch.
> 
> I included it in this patch as it is where these helpers are used
> for the first time. I can shuffle it into the first patch if you prefer;
> I agree it does make sense to put all the dissector changes there.

I moved things around as you suggested and posted v2.

^ permalink raw reply

* [PATCH v2 net-next 1/2] flow dissector: ICMP support
From: Simon Horman @ 2016-12-02 20:31 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, Jay Vosburgh, Veaceslav Falico, Andy Gospodarek,
	Jamal Hadi Salim, Jiri Pirko, Simon Horman
In-Reply-To: <1480710702-16850-1-git-send-email-simon.horman@netronome.com>

Allow dissection of ICMP(V6) type and code. This re-uses transport layer
port dissection code as although ICMP is not a transport protocol and their
type and code are not ports this allows sharing of both code and storage.

Signed-off-by: Simon Horman <simon.horman@netronome.com>
---
 drivers/net/bonding/bond_main.c |  6 +++--
 include/linux/skbuff.h          |  5 +++++
 include/net/flow_dissector.h    | 50 ++++++++++++++++++++++++++++++++++++++---
 net/core/flow_dissector.c       | 34 +++++++++++++++++++++++++---
 net/sched/cls_flow.c            |  4 ++--
 5 files changed, 89 insertions(+), 10 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 8029dd4912b6..a6f75cfb2bf7 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3181,7 +3181,8 @@ static bool bond_flow_dissect(struct bonding *bond, struct sk_buff *skb,
 	} else {
 		return false;
 	}
-	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 && proto >= 0)
+	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER34 &&
+	    proto >= 0 && !skb_flow_is_icmp_any(skb, proto))
 		fk->ports.ports = skb_flow_get_ports(skb, noff, proto);
 
 	return true;
@@ -3209,7 +3210,8 @@ u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb)
 		return bond_eth_hash(skb);
 
 	if (bond->params.xmit_policy == BOND_XMIT_POLICY_LAYER23 ||
-	    bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23)
+	    bond->params.xmit_policy == BOND_XMIT_POLICY_ENCAP23 ||
+	    flow_keys_are_icmp_any(&flow))
 		hash = bond_eth_hash(skb);
 	else
 		hash = (__force u32)flow.ports.ports;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9c535fbccf2c..44a8f69a9198 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1094,6 +1094,11 @@ u32 __skb_get_poff(const struct sk_buff *skb, void *data,
 __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
 			    void *data, int hlen_proto);
 
+static inline bool skb_flow_is_icmp_any(const struct sk_buff *skb, u8 ip_proto)
+{
+	return flow_protos_are_icmp_any(skb->protocol, ip_proto);
+}
+
 static inline __be32 skb_flow_get_ports(const struct sk_buff *skb,
 					int thoff, u8 ip_proto)
 {
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index c4f31666afd2..5540dfa18872 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -2,6 +2,7 @@
 #define _NET_FLOW_DISSECTOR_H
 
 #include <linux/types.h>
+#include <linux/in.h>
 #include <linux/in6.h>
 #include <uapi/linux/if_ether.h>
 
@@ -89,10 +90,15 @@ struct flow_dissector_key_addrs {
 };
 
 /**
- * flow_dissector_key_tp_ports:
- *	@ports: port numbers of Transport header
+ * flow_dissector_key_ports:
+ *	@ports: port numbers of Transport header or
+ *		type and code of ICMP header
+ *		ports: source (high) and destination (low) port numbers
  *		src: source port number
  *		dst: destination port number
+ *		icmp: ICMP type (high) and code (low)
+ *		type: ICMP type
+ *		type: ICMP code
  */
 struct flow_dissector_key_ports {
 	union {
@@ -101,6 +107,11 @@ struct flow_dissector_key_ports {
 			__be16 src;
 			__be16 dst;
 		};
+		__be16 icmp;
+		struct {
+			u8 type;
+			u8 code;
+		};
 	};
 };
 
@@ -188,9 +199,42 @@ struct flow_keys_digest {
 void make_flow_keys_digest(struct flow_keys_digest *digest,
 			   const struct flow_keys *flow);
 
+static inline bool flow_protos_are_icmpv4(__be16 n_proto, u8 ip_proto)
+{
+	return n_proto == htons(ETH_P_IP) && ip_proto == IPPROTO_ICMP;
+}
+
+static inline bool flow_protos_are_icmpv6(__be16 n_proto, u8 ip_proto)
+{
+	return n_proto == htons(ETH_P_IPV6) && ip_proto == IPPROTO_ICMPV6;
+}
+
+static inline bool flow_protos_are_icmp_any(__be16 n_proto, u8 ip_proto)
+{
+	return flow_protos_are_icmpv4(n_proto, ip_proto) ||
+		flow_protos_are_icmpv6(n_proto, ip_proto);
+}
+
+static inline bool flow_basic_key_is_icmpv4(const struct flow_dissector_key_basic *basic)
+{
+	return flow_protos_are_icmpv4(basic->n_proto, basic->ip_proto);
+}
+
+static inline bool flow_basic_key_is_icmpv6(const struct flow_dissector_key_basic *basic)
+{
+	return flow_protos_are_icmpv6(basic->n_proto, basic->ip_proto);
+}
+
+static inline bool flow_keys_are_icmp_any(const struct flow_keys *keys)
+{
+	return flow_protos_are_icmp_any(keys->basic.n_proto,
+					keys->basic.ip_proto);
+}
+
 static inline bool flow_keys_have_l4(const struct flow_keys *keys)
 {
-	return (keys->ports.ports || keys->tags.flow_label);
+	return (!flow_keys_are_icmp_any(keys) && keys->ports.ports) ||
+		keys->tags.flow_label;
 }
 
 u32 flow_hash_from_keys(struct flow_keys *keys);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 1eb6f949e5b2..0584b4bb4390 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -58,6 +58,28 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
 EXPORT_SYMBOL(skb_flow_dissector_init);
 
 /**
+ * skb_flow_get_be16 - extract be16 entity
+ * @skb: sk_buff to extract from
+ * @poff: offset to extract at
+ * @data: raw buffer pointer to the packet
+ * @hlen: packet header length
+ *
+ * The function will try to retrieve a be32 entity at
+ * offset poff
+ */
+__be16 skb_flow_get_be16(const struct sk_buff *skb, int poff, void *data,
+			 int hlen)
+{
+	__be16 *u, _u;
+
+	u = __skb_header_pointer(skb, poff, sizeof(_u), data, hlen, &_u);
+	if (u)
+		return *u;
+
+	return 0;
+}
+
+/**
  * __skb_flow_get_ports - extract the upper layer ports and return them
  * @skb: sk_buff to extract the ports from
  * @thoff: transport header offset
@@ -542,8 +564,13 @@ bool __skb_flow_dissect(const struct sk_buff *skb,
 		key_ports = skb_flow_dissector_target(flow_dissector,
 						      FLOW_DISSECTOR_KEY_PORTS,
 						      target_container);
-		key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
-							data, hlen);
+		if (flow_protos_are_icmp_any(proto, ip_proto))
+			key_ports->icmp = skb_flow_get_be16(skb, nhoff, data,
+							    hlen);
+		else
+			key_ports->ports = __skb_flow_get_ports(skb, nhoff,
+								ip_proto, data,
+								hlen);
 	}
 
 out_good:
@@ -718,7 +745,8 @@ void make_flow_keys_digest(struct flow_keys_digest *digest,
 
 	data->n_proto = flow->basic.n_proto;
 	data->ip_proto = flow->basic.ip_proto;
-	data->ports = flow->ports.ports;
+	if (flow_keys_have_l4(flow))
+		data->ports = flow->ports.ports;
 	data->src = flow->addrs.v4addrs.src;
 	data->dst = flow->addrs.v4addrs.dst;
 }
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index e39672394c7b..a1a7ae71aa62 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -96,7 +96,7 @@ static u32 flow_get_proto(const struct sk_buff *skb,
 static u32 flow_get_proto_src(const struct sk_buff *skb,
 			      const struct flow_keys *flow)
 {
-	if (flow->ports.ports)
+	if (!flow_keys_are_icmp_any(flow) && flow->ports.ports)
 		return ntohs(flow->ports.src);
 
 	return addr_fold(skb->sk);
@@ -105,7 +105,7 @@ static u32 flow_get_proto_src(const struct sk_buff *skb,
 static u32 flow_get_proto_dst(const struct sk_buff *skb,
 			      const struct flow_keys *flow)
 {
-	if (flow->ports.ports)
+	if (!flow_keys_are_icmp_any(flow) && flow->ports.ports)
 		return ntohs(flow->ports.dst);
 
 	return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
-- 
2.7.0.rc3.207.g0ac5344

^ permalink raw reply related

* [PATCH v2 net-next 0/2] net/sched: cls_flower: Support matching on ICMP
From: Simon Horman @ 2016-12-02 20:31 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, Jay Vosburgh, Veaceslav Falico, Andy Gospodarek,
	Jamal Hadi Salim, Jiri Pirko, Simon Horman

Hi,

this series add supports for matching on ICMP type and code to cls_flower.
This is modeled on existing support for matching on L4 ports. The updates
to the dissector are intended to allow for code and storage re-use.

Changes v1->v2:
* Add all flow dissector helpers in first patch

Change RFC->v1:
* Drop RFC designation after positive review from Jiri

Simon Horman (2):
  flow dissector: ICMP support
  net/sched: cls_flower: Support matching on ICMP type and code

 drivers/net/bonding/bond_main.c |  6 +++--
 include/linux/skbuff.h          |  5 +++++
 include/net/flow_dissector.h    | 50 ++++++++++++++++++++++++++++++++++++++---
 include/uapi/linux/pkt_cls.h    | 10 +++++++++
 net/core/flow_dissector.c       | 34 +++++++++++++++++++++++++---
 net/sched/cls_flow.c            |  4 ++--
 net/sched/cls_flower.c          | 42 ++++++++++++++++++++++++++++++++++
 7 files changed, 141 insertions(+), 10 deletions(-)

-- 
2.7.0.rc3.207.g0ac5344

^ permalink raw reply

* [PATCH v2 net-next 2/2] net/sched: cls_flower: Support matching on ICMP type and code
From: Simon Horman @ 2016-12-02 20:31 UTC (permalink / raw)
  To: David Miller
  Cc: netdev, Jay Vosburgh, Veaceslav Falico, Andy Gospodarek,
	Jamal Hadi Salim, Jiri Pirko, Simon Horman
In-Reply-To: <1480710702-16850-1-git-send-email-simon.horman@netronome.com>

Support matching on ICMP type and code.

Example usage:

tc qdisc add dev eth0 ingress

tc filter add dev eth0 protocol ip parent ffff: flower \
	indev eth0 ip_proto icmp type 8 code 0 action drop

tc filter add dev eth0 protocol ipv6 parent ffff: flower \
	indev eth0 ip_proto icmpv6 type 128 code 0 action drop

Signed-off-by: Simon Horman <simon.horman@netronome.com>
---
 include/uapi/linux/pkt_cls.h | 10 ++++++++++
 net/sched/cls_flower.c       | 42 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 52 insertions(+)

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 86786d45ee66..58160fe80b80 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -457,6 +457,16 @@ enum {
 	TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK,	/* be16 */
 	TCA_FLOWER_KEY_ENC_UDP_DST_PORT,	/* be16 */
 	TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK,	/* be16 */
+
+	TCA_FLOWER_KEY_ICMPV4_CODE,	/* u8 */
+	TCA_FLOWER_KEY_ICMPV4_CODE_MASK,/* u8 */
+	TCA_FLOWER_KEY_ICMPV4_TYPE,	/* u8 */
+	TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,/* u8 */
+	TCA_FLOWER_KEY_ICMPV6_CODE,	/* u8 */
+	TCA_FLOWER_KEY_ICMPV6_CODE_MASK,/* u8 */
+	TCA_FLOWER_KEY_ICMPV6_TYPE,	/* u8 */
+	TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,/* u8 */
+
 	__TCA_FLOWER_MAX,
 };
 
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 1cacfa5c95f3..f639761eacfb 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -361,6 +361,14 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
 	[TCA_FLOWER_KEY_ENC_UDP_SRC_PORT_MASK]	= { .type = NLA_U16 },
 	[TCA_FLOWER_KEY_ENC_UDP_DST_PORT]	= { .type = NLA_U16 },
 	[TCA_FLOWER_KEY_ENC_UDP_DST_PORT_MASK]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_ICMPV4_TYPE]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_ICMPV4_TYPE_MASK] = { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_ICMPV4_CODE]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_ICMPV4_CODE_MASK] = { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_ICMPV6_TYPE]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_ICMPV6_TYPE_MASK] = { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_ICMPV6_CODE]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_ICMPV6_CODE_MASK] = { .type = NLA_U8 },
 };
 
 static void fl_set_key_val(struct nlattr **tb,
@@ -477,6 +485,20 @@ static int fl_set_key(struct net *net, struct nlattr **tb,
 		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_SCTP_DST,
 			       &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK,
 			       sizeof(key->tp.dst));
+	} else if (flow_basic_key_is_icmpv4(&key->basic)) {
+		fl_set_key_val(tb, &key->tp.type, TCA_FLOWER_KEY_ICMPV4_TYPE,
+			       &mask->tp.type, TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,
+			       sizeof(key->tp.type));
+		fl_set_key_val(tb, &key->tp.code, TCA_FLOWER_KEY_ICMPV4_CODE,
+			       &mask->tp.code, TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
+			       sizeof(key->tp.code));
+	} else if (flow_basic_key_is_icmpv6(&key->basic)) {
+		fl_set_key_val(tb, &key->tp.type, TCA_FLOWER_KEY_ICMPV6_TYPE,
+			       &mask->tp.type, TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,
+			       sizeof(key->tp.type));
+		fl_set_key_val(tb, &key->tp.code, TCA_FLOWER_KEY_ICMPV4_CODE,
+			       &mask->tp.code, TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
+			       sizeof(key->tp.code));
 	}
 
 	if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] ||
@@ -950,6 +972,26 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 				  &mask->tp.dst, TCA_FLOWER_KEY_SCTP_DST_MASK,
 				  sizeof(key->tp.dst))))
 		goto nla_put_failure;
+	else if (flow_basic_key_is_icmpv4(&key->basic) &&
+		 (fl_dump_key_val(skb, &key->tp.type,
+				  TCA_FLOWER_KEY_ICMPV4_TYPE, &mask->tp.type,
+				  TCA_FLOWER_KEY_ICMPV4_TYPE_MASK,
+				  sizeof(key->tp.type)) ||
+		  fl_dump_key_val(skb, &key->tp.code,
+				  TCA_FLOWER_KEY_ICMPV4_CODE, &mask->tp.code,
+				  TCA_FLOWER_KEY_ICMPV4_CODE_MASK,
+				  sizeof(key->tp.code))))
+		goto nla_put_failure;
+	else if (flow_basic_key_is_icmpv6(&key->basic) &&
+		 (fl_dump_key_val(skb, &key->tp.type,
+				  TCA_FLOWER_KEY_ICMPV6_TYPE, &mask->tp.type,
+				  TCA_FLOWER_KEY_ICMPV6_TYPE_MASK,
+				  sizeof(key->tp.type)) ||
+		  fl_dump_key_val(skb, &key->tp.code,
+				  TCA_FLOWER_KEY_ICMPV6_CODE, &mask->tp.code,
+				  TCA_FLOWER_KEY_ICMPV6_CODE_MASK,
+				  sizeof(key->tp.code))))
+		goto nla_put_failure;
 
 	if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS &&
 	    (fl_dump_key_val(skb, &key->enc_ipv4.src,
-- 
2.7.0.rc3.207.g0ac5344

^ permalink raw reply related

* [PATCH v3 06/13] net: ethernet: ti: cpts: disable cpts when unregistered
From: Grygorii Strashko @ 2016-12-02 20:30 UTC (permalink / raw)
  To: David S. Miller, netdev, Mugunthan V N, Richard Cochran
  Cc: Sekhar Nori, linux-kernel, linux-omap, Rob Herring, devicetree,
	Murali Karicheri, Wingman Kwok, Thomas Gleixner,
	Grygorii Strashko
In-Reply-To: <20161202203023.25526-1-grygorii.strashko@ti.com>

The cpts now is left enabled after unregistration.
Hence, disable it in cpts_unregister().

Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
---
 drivers/net/ethernet/ti/cpts.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 3dda6d5..d3c1ac5 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -404,6 +404,10 @@ void cpts_unregister(struct cpts *cpts)
 		ptp_clock_unregister(cpts->clock);
 		cancel_delayed_work_sync(&cpts->overflow_work);
 	}
+
+	cpts_write32(cpts, 0, int_enable);
+	cpts_write32(cpts, 0, control);
+
 	if (cpts->refclk)
 		cpts_clk_release(cpts);
 }
-- 
2.10.1

^ permalink raw reply related

* [PATCH v3 11/13] clocksource: export the clocks_calc_mult_shift to use by timestamp code
From: Grygorii Strashko @ 2016-12-02 20:30 UTC (permalink / raw)
  To: David S. Miller, netdev, Mugunthan V N, Richard Cochran
  Cc: Sekhar Nori, linux-kernel, linux-omap, Rob Herring, devicetree,
	Murali Karicheri, Wingman Kwok, Thomas Gleixner, John Stultz,
	Grygorii Strashko
In-Reply-To: <20161202203023.25526-1-grygorii.strashko@ti.com>

From: Murali Karicheri <m-karicheri2@ti.com>

The CPSW CPTS driver is capable of doing timestamping on tx/rx packets and
requires to know mult and shift factors for timestamp conversion from raw
value to nanoseconds (ptp clock). Now these mult and shift factors are
calculated manually and provided through DT, which makes very hard to
support of a lot number of platforms, especially if CPTS refclk is not the
same for some kind of boards and depends on efuse settings (Keystone 2
platforms). Hence, export clocks_calc_mult_shift() to allow drivers like
CPSW CPTS (and other ptp drivesr) to benefit from automaitc calculation of
mult and shift factors.

Cc: John Stultz <john.stultz@linaro.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Murali Karicheri <m-karicheri2@ti.com>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
---
 kernel/time/clocksource.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 7e4fad7..150242c 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -89,6 +89,7 @@ clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 maxsec)
 	*mult = tmp;
 	*shift = sft;
 }
+EXPORT_SYMBOL_GPL(clocks_calc_mult_shift);
 
 /*[Clocksource internal variables]---------
  * curr_clocksource:
-- 
2.10.1

^ permalink raw reply related

* [PATCH v3 13/13] net: ethernet: ti: cpts: fix overflow check period
From: Grygorii Strashko @ 2016-12-02 20:30 UTC (permalink / raw)
  To: David S. Miller, netdev, Mugunthan V N, Richard Cochran
  Cc: Sekhar Nori, linux-kernel, linux-omap, Rob Herring, devicetree,
	Murali Karicheri, Wingman Kwok, Thomas Gleixner,
	Grygorii Strashko, John Stultz
In-Reply-To: <20161202203023.25526-1-grygorii.strashko@ti.com>

The CPTS drivers uses 8sec period for overflow checking with
assumption that CPTS retclk will not exceed 500MHz. But that's not
true on some TI platforms (Kesytone 2). As result, it is possible that
CPTS counter will overflow more than once between two readings.

Hence, fix it by selecting overflow check period dynamically as
max_sec_before_overflow/2, where
 max_sec_before_overflow = max_counter_val / rftclk_freq.

Cc: John Stultz <john.stultz@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
---
 drivers/net/ethernet/ti/cpts.c | 10 +++++++---
 drivers/net/ethernet/ti/cpts.h |  4 +---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 806241b..e9a8b12 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -245,7 +245,7 @@ static void cpts_overflow_check(struct work_struct *work)
 
 	cpts_ptp_gettime(&cpts->info, &ts);
 	pr_debug("cpts overflow check at %lld.%09lu\n", ts.tv_sec, ts.tv_nsec);
-	schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD);
+	schedule_delayed_work(&cpts->overflow_work, cpts->ov_check_period);
 }
 
 static int cpts_match(struct sk_buff *skb, unsigned int ptp_class,
@@ -382,8 +382,7 @@ int cpts_register(struct cpts *cpts)
 	}
 	cpts->phc_index = ptp_clock_index(cpts->clock);
 
-	schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD);
-
+	schedule_delayed_work(&cpts->overflow_work, cpts->ov_check_period);
 	return 0;
 
 err_ptp:
@@ -427,6 +426,11 @@ static void cpts_calc_mult_shift(struct cpts *cpts)
 	if (maxsec > 10)
 		maxsec = 10;
 
+	/* Calc overflow check period (maxsec / 2) */
+	cpts->ov_check_period = (HZ * maxsec) / 2;
+	dev_info(cpts->dev, "cpts: overflow check period %lu (jiffies)\n",
+		 cpts->ov_check_period);
+
 	if (cpts->cc_mult || cpts->cc.shift)
 		return;
 
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index 5da23af..c96eca2 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -97,9 +97,6 @@ enum {
 	CPTS_EV_TX,   /* Ethernet Transmit Event */
 };
 
-/* This covers any input clock up to about 500 MHz. */
-#define CPTS_OVERFLOW_PERIOD (HZ * 8)
-
 #define CPTS_FIFO_DEPTH 16
 #define CPTS_MAX_EVENTS 32
 
@@ -127,6 +124,7 @@ struct cpts {
 	struct list_head events;
 	struct list_head pool;
 	struct cpts_event pool_data[CPTS_MAX_EVENTS];
+	unsigned long ov_check_period;
 };
 
 void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb);
-- 
2.10.1

^ permalink raw reply related

* [PATCH v3 12/13] net: ethernet: ti: cpts: calc mult and shift from refclk freq
From: Grygorii Strashko @ 2016-12-02 20:30 UTC (permalink / raw)
  To: David S. Miller, netdev, Mugunthan V N, Richard Cochran
  Cc: Sekhar Nori, linux-kernel, linux-omap, Rob Herring, devicetree,
	Murali Karicheri, Wingman Kwok, Thomas Gleixner,
	Grygorii Strashko, John Stultz
In-Reply-To: <20161202203023.25526-1-grygorii.strashko@ti.com>

The cyclecounter mult and shift values can be calculated based on the
CPTS rfclk frequency and timekeepnig framework provides required algos
and API's.

Hence, calc mult and shift basing on CPTS rfclk frequency if both
cpts_clock_shift and cpts_clock_mult properties are not provided in DT (the
basis of calculation algorithm is borrowed from
__clocksource_update_freq_scale() commit 7d2f944a2b83 ("clocksource:
Provide a generic mult/shift factor calculation")). After this change
cpts_clock_shift and cpts_clock_mult DT properties will become optional.

Cc: John Stultz <john.stultz@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
---
 Documentation/devicetree/bindings/net/cpsw.txt |  8 ++--
 drivers/net/ethernet/ti/cpts.c                 | 53 +++++++++++++++++++++++---
 2 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/cpsw.txt b/Documentation/devicetree/bindings/net/cpsw.txt
index 5ad439f..ebda7c9 100644
--- a/Documentation/devicetree/bindings/net/cpsw.txt
+++ b/Documentation/devicetree/bindings/net/cpsw.txt
@@ -20,8 +20,6 @@ Required properties:
 - slaves		: Specifies number for slaves
 - active_slave		: Specifies the slave to use for time stamping,
 			  ethtool and SIOCGMIIPHY
-- cpts_clock_mult	: Numerator to convert input clock ticks into nanoseconds
-- cpts_clock_shift	: Denominator to convert input clock ticks into nanoseconds
 
 Optional properties:
 - ti,hwmods		: Must be "cpgmac0"
@@ -35,7 +33,11 @@ Optional properties:
 			  For example in dra72x-evm, pcf gpio has to be
 			  driven low so that cpsw slave 0 and phy data
 			  lines are connected via mux.
-
+- cpts_clock_mult	: Numerator to convert input clock ticks into nanoseconds
+- cpts_clock_shift	: Denominator to convert input clock ticks into nanoseconds
+			  Mult and shift will be calculated basing on CPTS
+			  rftclk frequency if both cpts_clock_shift and
+			  cpts_clock_mult properties are not provided.
 
 Slave Properties:
 Required properties:
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 5d5c46d..806241b 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -409,21 +409,60 @@ void cpts_unregister(struct cpts *cpts)
 }
 EXPORT_SYMBOL_GPL(cpts_unregister);
 
+static void cpts_calc_mult_shift(struct cpts *cpts)
+{
+	u64 frac, maxsec, ns;
+	u32 freq, mult, shift;
+
+	freq = clk_get_rate(cpts->refclk);
+
+	/* Calc the maximum number of seconds which we can run before
+	 * wrapping around.
+	 */
+	maxsec = cpts->cc.mask;
+	do_div(maxsec, freq);
+	/* limit conversation rate to 10 sec as higher values will produce
+	 * too small mult factors and so reduce the conversion accuracy
+	 */
+	if (maxsec > 10)
+		maxsec = 10;
+
+	if (cpts->cc_mult || cpts->cc.shift)
+		return;
+
+	clocks_calc_mult_shift(&mult, &shift, freq, NSEC_PER_SEC, maxsec);
+
+	cpts->cc_mult = mult;
+	cpts->cc.mult = mult;
+	cpts->cc.shift = shift;
+
+	frac = 0;
+	ns = cyclecounter_cyc2ns(&cpts->cc, freq, cpts->cc.mask, &frac);
+
+	dev_info(cpts->dev,
+		 "CPTS: ref_clk_freq:%u calc_mult:%u calc_shift:%u error:%lld nsec/sec\n",
+		 freq, cpts->cc_mult, cpts->cc.shift, (ns - NSEC_PER_SEC));
+}
+
 static int cpts_of_parse(struct cpts *cpts, struct device_node *node)
 {
 	int ret = -EINVAL;
 	u32 prop;
 
-	if (of_property_read_u32(node, "cpts_clock_mult", &prop))
-		goto  of_error;
 	/* save cc.mult original value as it can be modified
 	 * by cpts_ptp_adjfreq().
 	 */
-	cpts->cc_mult = prop;
+	cpts->cc_mult = 0;
+	if (!of_property_read_u32(node, "cpts_clock_mult", &prop))
+		cpts->cc_mult = prop;
+
+	cpts->cc.shift = 0;
+	if (!of_property_read_u32(node, "cpts_clock_shift", &prop))
+		cpts->cc.shift = prop;
 
-	if (of_property_read_u32(node, "cpts_clock_shift", &prop))
-		goto  of_error;
-	cpts->cc.shift = prop;
+	if ((cpts->cc_mult && !cpts->cc.shift) ||
+	    (!cpts->cc_mult && cpts->cc.shift))
+		goto of_error;
 
 	return 0;
 
@@ -463,6 +502,8 @@ struct cpts *cpts_create(struct device *dev, void __iomem *regs,
 	cpts->cc.mask = CLOCKSOURCE_MASK(32);
 	cpts->info = cpts_info;
 
+	cpts_calc_mult_shift(cpts);
+
 	return cpts;
 }
 EXPORT_SYMBOL_GPL(cpts_create);
-- 
2.10.1

^ permalink raw reply related

* [PATCH v3 10/13] net: ethernet: ti: cpts: move dt props parsing to cpts driver
From: Grygorii Strashko @ 2016-12-02 20:30 UTC (permalink / raw)
  To: David S. Miller, netdev-u79uwXL29TY76Z2rM5mHXA, Mugunthan V N,
	Richard Cochran
  Cc: Sekhar Nori, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-omap-u79uwXL29TY76Z2rM5mHXA, Rob Herring,
	devicetree-u79uwXL29TY76Z2rM5mHXA, Murali Karicheri, Wingman Kwok,
	Thomas Gleixner, Grygorii Strashko
In-Reply-To: <20161202203023.25526-1-grygorii.strashko-l0cyMroinI0@public.gmane.org>

Move DT properties parsing into CPTS driver to simplify CPSW
code and CPTS driver porting on other SoC in the future
(like Keystone 2) - with this change it will not be required
to add the same DT parsing code in Keystone 2 NETCP driver.

Signed-off-by: Grygorii Strashko <grygorii.strashko-l0cyMroinI0@public.gmane.org>
---
 drivers/net/ethernet/ti/cpsw.c | 16 +---------------
 drivers/net/ethernet/ti/cpsw.h |  2 --
 drivers/net/ethernet/ti/cpts.c | 32 +++++++++++++++++++++++++++++---
 drivers/net/ethernet/ti/cpts.h |  5 +++--
 4 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 6c28ef1..ae1ec6a 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -2312,18 +2312,6 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 	}
 	data->active_slave = prop;
 
-	if (of_property_read_u32(node, "cpts_clock_mult", &prop)) {
-		dev_err(&pdev->dev, "Missing cpts_clock_mult property in the DT.\n");
-		return -EINVAL;
-	}
-	data->cpts_clock_mult = prop;
-
-	if (of_property_read_u32(node, "cpts_clock_shift", &prop)) {
-		dev_err(&pdev->dev, "Missing cpts_clock_shift property in the DT.\n");
-		return -EINVAL;
-	}
-	data->cpts_clock_shift = prop;
-
 	data->slave_data = devm_kzalloc(&pdev->dev, data->slaves
 					* sizeof(struct cpsw_slave_data),
 					GFP_KERNEL);
@@ -2789,9 +2777,7 @@ static int cpsw_probe(struct platform_device *pdev)
 		goto clean_dma_ret;
 	}
 
-	cpsw->cpts = cpts_create(cpsw->dev, cpts_regs,
-				 cpsw->data.cpts_clock_mult,
-				 cpsw->data.cpts_clock_shift);
+	cpsw->cpts = cpts_create(cpsw->dev, cpts_regs, cpsw->dev->of_node);
 	if (IS_ERR(cpsw->cpts)) {
 		ret = PTR_ERR(cpsw->cpts);
 		goto clean_ale_ret;
diff --git a/drivers/net/ethernet/ti/cpsw.h b/drivers/net/ethernet/ti/cpsw.h
index 16b54c6..6c3037a 100644
--- a/drivers/net/ethernet/ti/cpsw.h
+++ b/drivers/net/ethernet/ti/cpsw.h
@@ -31,8 +31,6 @@ struct cpsw_platform_data {
 	u32	channels;	/* number of cpdma channels (symmetric) */
 	u32	slaves;		/* number of slave cpgmac ports */
 	u32	active_slave; /* time stamping, ethtool and SIOCGMIIPHY slave */
-	u32	cpts_clock_mult;  /* convert input clock ticks to nanoseconds */
-	u32	cpts_clock_shift; /* convert input clock ticks to nanoseconds */
 	u32	ale_entries;	/* ale table size */
 	u32	bd_ram_size;  /*buffer descriptor ram size */
 	u32	mac_control;	/* Mac control register */
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 47831b2..5d5c46d 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -409,10 +409,34 @@ void cpts_unregister(struct cpts *cpts)
 }
 EXPORT_SYMBOL_GPL(cpts_unregister);
 
+static int cpts_of_parse(struct cpts *cpts, struct device_node *node)
+{
+	int ret = -EINVAL;
+	u32 prop;
+
+	if (of_property_read_u32(node, "cpts_clock_mult", &prop))
+		goto  of_error;
+	/* save cc.mult original value as it can be modified
+	 * by cpts_ptp_adjfreq().
+	 */
+	cpts->cc_mult = prop;
+
+	if (of_property_read_u32(node, "cpts_clock_shift", &prop))
+		goto  of_error;
+	cpts->cc.shift = prop;
+
+	return 0;
+
+of_error:
+	dev_err(cpts->dev, "CPTS: Missing property in the DT.\n");
+	return ret;
+}
+
 struct cpts *cpts_create(struct device *dev, void __iomem *regs,
-			 u32 mult, u32 shift)
+			 struct device_node *node)
 {
 	struct cpts *cpts;
+	int ret;
 
 	cpts = devm_kzalloc(dev, sizeof(*cpts), GFP_KERNEL);
 	if (!cpts)
@@ -423,6 +447,10 @@ struct cpts *cpts_create(struct device *dev, void __iomem *regs,
 	spin_lock_init(&cpts->lock);
 	INIT_DELAYED_WORK(&cpts->overflow_work, cpts_overflow_check);
 
+	ret = cpts_of_parse(cpts, node);
+	if (ret)
+		return ERR_PTR(ret);
+
 	cpts->refclk = devm_clk_get(dev, "cpts");
 	if (IS_ERR(cpts->refclk)) {
 		dev_err(dev, "Failed to get cpts refclk\n");
@@ -433,8 +461,6 @@ struct cpts *cpts_create(struct device *dev, void __iomem *regs,
 
 	cpts->cc.read = cpts_systim_read;
 	cpts->cc.mask = CLOCKSOURCE_MASK(32);
-	cpts->cc.shift = shift;
-	cpts->cc_mult = mult;
 	cpts->info = cpts_info;
 
 	return cpts;
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index e7d857c..5da23af 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -27,6 +27,7 @@
 #include <linux/clocksource.h>
 #include <linux/device.h>
 #include <linux/list.h>
+#include <linux/of.h>
 #include <linux/ptp_clock_kernel.h>
 #include <linux/skbuff.h>
 #include <linux/timecounter.h>
@@ -133,7 +134,7 @@ void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb);
 int cpts_register(struct cpts *cpts);
 void cpts_unregister(struct cpts *cpts);
 struct cpts *cpts_create(struct device *dev, void __iomem *regs,
-			 u32 mult, u32 shift);
+			 struct device_node *node);
 void cpts_release(struct cpts *cpts);
 
 static inline void cpts_rx_enable(struct cpts *cpts, int enable)
@@ -168,7 +169,7 @@ static inline void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb)
 
 static inline
 struct cpts *cpts_create(struct device *dev, void __iomem *regs,
-			 u32 mult, u32 shift)
+			 struct device_node *node)
 {
 	return NULL;
 }
-- 
2.10.1

--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 09/13] net: ethernet: ti: cpts: rework initialization/deinitialization
From: Grygorii Strashko @ 2016-12-02 20:30 UTC (permalink / raw)
  To: David S. Miller, netdev, Mugunthan V N, Richard Cochran
  Cc: Sekhar Nori, linux-kernel, linux-omap, Rob Herring, devicetree,
	Murali Karicheri, Wingman Kwok, Thomas Gleixner,
	Grygorii Strashko
In-Reply-To: <20161202203023.25526-1-grygorii.strashko@ti.com>

The current implementation CPTS initialization and deinitialization
(represented by cpts_register/unregister()) does too many static
initialization from .ndo_open(), which is reasonable to do once at probe
time instead, and also require caller to allocate memory for struct cpts,
which is internal for CPTS driver in general.

This patch splits CPTS initialization and deinitialization on two parts:

- static initializtion cpts_create()/cpts_release() which expected to be
executed when parent driver is probed/removed;

- dynamic part cpts_register/unregister() which expected to be executed
when network device is opened/closed.

As result, current code of CPTS parent driver - CPSW - will be simplified
(and it also will allow simplify adding support for Keystone 2 devices in
the future), plus more initialization errors will be catched earlier. In
addition, this change allows to clean up cpts.h for the case when CPTS is
disabled.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
---
 drivers/net/ethernet/ti/cpsw.c |  24 +++++-----
 drivers/net/ethernet/ti/cpts.c | 102 ++++++++++++++++++++++++-----------------
 drivers/net/ethernet/ti/cpts.h |  26 +++++++++--
 3 files changed, 95 insertions(+), 57 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index a6a93ad..6c28ef1 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1406,9 +1406,7 @@ static int cpsw_ndo_open(struct net_device *ndev)
 		if (ret < 0)
 			goto err_cleanup;
 
-		if (cpts_register(cpsw->dev, cpsw->cpts,
-				  cpsw->data.cpts_clock_mult,
-				  cpsw->data.cpts_clock_shift))
+		if (cpts_register(cpsw->cpts))
 			dev_err(priv->dev, "error registering cpts device\n");
 
 	}
@@ -2596,6 +2594,7 @@ static int cpsw_probe(struct platform_device *pdev)
 	struct cpdma_params		dma_params;
 	struct cpsw_ale_params		ale_params;
 	void __iomem			*ss_regs;
+	void __iomem			*cpts_regs;
 	struct resource			*res, *ss_res;
 	const struct of_device_id	*of_id;
 	struct gpio_descs		*mode;
@@ -2623,12 +2622,6 @@ static int cpsw_probe(struct platform_device *pdev)
 	priv->dev  = &ndev->dev;
 	priv->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG);
 	cpsw->rx_packet_max = max(rx_packet_max, 128);
-	cpsw->cpts = devm_kzalloc(&pdev->dev, sizeof(struct cpts), GFP_KERNEL);
-	if (!cpsw->cpts) {
-		dev_err(&pdev->dev, "error allocating cpts\n");
-		ret = -ENOMEM;
-		goto clean_ndev_ret;
-	}
 
 	mode = devm_gpiod_get_array_optional(&pdev->dev, "mode", GPIOD_OUT_LOW);
 	if (IS_ERR(mode)) {
@@ -2716,7 +2709,7 @@ static int cpsw_probe(struct platform_device *pdev)
 	switch (cpsw->version) {
 	case CPSW_VERSION_1:
 		cpsw->host_port_regs = ss_regs + CPSW1_HOST_PORT_OFFSET;
-		cpsw->cpts->reg      = ss_regs + CPSW1_CPTS_OFFSET;
+		cpts_regs		= ss_regs + CPSW1_CPTS_OFFSET;
 		cpsw->hw_stats	     = ss_regs + CPSW1_HW_STATS;
 		dma_params.dmaregs   = ss_regs + CPSW1_CPDMA_OFFSET;
 		dma_params.txhdp     = ss_regs + CPSW1_STATERAM_OFFSET;
@@ -2730,7 +2723,7 @@ static int cpsw_probe(struct platform_device *pdev)
 	case CPSW_VERSION_3:
 	case CPSW_VERSION_4:
 		cpsw->host_port_regs = ss_regs + CPSW2_HOST_PORT_OFFSET;
-		cpsw->cpts->reg      = ss_regs + CPSW2_CPTS_OFFSET;
+		cpts_regs		= ss_regs + CPSW2_CPTS_OFFSET;
 		cpsw->hw_stats	     = ss_regs + CPSW2_HW_STATS;
 		dma_params.dmaregs   = ss_regs + CPSW2_CPDMA_OFFSET;
 		dma_params.txhdp     = ss_regs + CPSW2_STATERAM_OFFSET;
@@ -2796,6 +2789,14 @@ static int cpsw_probe(struct platform_device *pdev)
 		goto clean_dma_ret;
 	}
 
+	cpsw->cpts = cpts_create(cpsw->dev, cpts_regs,
+				 cpsw->data.cpts_clock_mult,
+				 cpsw->data.cpts_clock_shift);
+	if (IS_ERR(cpsw->cpts)) {
+		ret = PTR_ERR(cpsw->cpts);
+		goto clean_ale_ret;
+	}
+
 	ndev->irq = platform_get_irq(pdev, 1);
 	if (ndev->irq < 0) {
 		dev_err(priv->dev, "error getting irq resource\n");
@@ -2911,6 +2912,7 @@ static int cpsw_remove(struct platform_device *pdev)
 		unregister_netdev(cpsw->slaves[1].ndev);
 	unregister_netdev(ndev);
 
+	cpts_release(cpsw->cpts);
 	cpsw_ale_destroy(cpsw->ale);
 	cpdma_ctlr_destroy(cpsw->dma);
 	cpsw_remove_dt(pdev);
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index a662c33..47831b2 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -248,24 +248,6 @@ static void cpts_overflow_check(struct work_struct *work)
 	schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD);
 }
 
-static void cpts_clk_init(struct device *dev, struct cpts *cpts)
-{
-	if (!cpts->refclk) {
-		cpts->refclk = devm_clk_get(dev, "cpts");
-		if (IS_ERR(cpts->refclk)) {
-			dev_err(dev, "Failed to get cpts refclk\n");
-			cpts->refclk = NULL;
-			return;
-		}
-	}
-	clk_prepare_enable(cpts->refclk);
-}
-
-static void cpts_clk_release(struct cpts *cpts)
-{
-	clk_disable_unprepare(cpts->refclk);
-}
-
 static int cpts_match(struct sk_buff *skb, unsigned int ptp_class,
 		      u16 ts_seqid, u8 ts_msgtype)
 {
@@ -372,34 +354,27 @@ void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(cpts_tx_timestamp);
 
-int cpts_register(struct device *dev, struct cpts *cpts,
-		  u32 mult, u32 shift)
+int cpts_register(struct cpts *cpts)
 {
 	int err, i;
 
-	cpts->info = cpts_info;
-	spin_lock_init(&cpts->lock);
-
-	cpts->cc.read = cpts_systim_read;
-	cpts->cc.mask = CLOCKSOURCE_MASK(32);
-	cpts->cc_mult = mult;
-	cpts->cc.mult = mult;
-	cpts->cc.shift = shift;
-
 	INIT_LIST_HEAD(&cpts->events);
 	INIT_LIST_HEAD(&cpts->pool);
 	for (i = 0; i < CPTS_MAX_EVENTS; i++)
 		list_add(&cpts->pool_data[i].list, &cpts->pool);
 
-	cpts_clk_init(dev, cpts);
+	clk_enable(cpts->refclk);
+
 	cpts_write32(cpts, CPTS_EN, control);
 	cpts_write32(cpts, TS_PEND_EN, int_enable);
 
+	/* reinitialize cc.mult to original value as it can be modified
+	 * by cpts_ptp_adjfreq().
+	 */
+	cpts->cc.mult = cpts->cc_mult;
 	timecounter_init(&cpts->tc, &cpts->cc, ktime_to_ns(ktime_get_real()));
 
-	INIT_DELAYED_WORK(&cpts->overflow_work, cpts_overflow_check);
-
-	cpts->clock = ptp_clock_register(&cpts->info, dev);
+	cpts->clock = ptp_clock_register(&cpts->info, cpts->dev);
 	if (IS_ERR(cpts->clock)) {
 		err = PTR_ERR(cpts->clock);
 		cpts->clock = NULL;
@@ -412,27 +387,72 @@ int cpts_register(struct device *dev, struct cpts *cpts,
 	return 0;
 
 err_ptp:
-	if (cpts->refclk)
-		cpts_clk_release(cpts);
+	clk_disable(cpts->refclk);
 	return err;
 }
 EXPORT_SYMBOL_GPL(cpts_register);
 
 void cpts_unregister(struct cpts *cpts)
 {
-	if (cpts->clock) {
-		ptp_clock_unregister(cpts->clock);
-		cancel_delayed_work_sync(&cpts->overflow_work);
-	}
+	if (WARN_ON(!cpts->clock))
+		return;
+
+	cancel_delayed_work_sync(&cpts->overflow_work);
+
+	ptp_clock_unregister(cpts->clock);
+	cpts->clock = NULL;
 
 	cpts_write32(cpts, 0, int_enable);
 	cpts_write32(cpts, 0, control);
 
-	if (cpts->refclk)
-		cpts_clk_release(cpts);
+	clk_disable(cpts->refclk);
 }
 EXPORT_SYMBOL_GPL(cpts_unregister);
 
+struct cpts *cpts_create(struct device *dev, void __iomem *regs,
+			 u32 mult, u32 shift)
+{
+	struct cpts *cpts;
+
+	cpts = devm_kzalloc(dev, sizeof(*cpts), GFP_KERNEL);
+	if (!cpts)
+		return ERR_PTR(-ENOMEM);
+
+	cpts->dev = dev;
+	cpts->reg = (struct cpsw_cpts __iomem *)regs;
+	spin_lock_init(&cpts->lock);
+	INIT_DELAYED_WORK(&cpts->overflow_work, cpts_overflow_check);
+
+	cpts->refclk = devm_clk_get(dev, "cpts");
+	if (IS_ERR(cpts->refclk)) {
+		dev_err(dev, "Failed to get cpts refclk\n");
+		return ERR_PTR(PTR_ERR(cpts->refclk));
+	}
+
+	clk_prepare(cpts->refclk);
+
+	cpts->cc.read = cpts_systim_read;
+	cpts->cc.mask = CLOCKSOURCE_MASK(32);
+	cpts->cc.shift = shift;
+	cpts->cc_mult = mult;
+	cpts->info = cpts_info;
+
+	return cpts;
+}
+EXPORT_SYMBOL_GPL(cpts_create);
+
+void cpts_release(struct cpts *cpts)
+{
+	if (!cpts)
+		return;
+
+	if (WARN_ON(!cpts->clock))
+		return;
+
+	clk_unprepare(cpts->refclk);
+}
+EXPORT_SYMBOL_GPL(cpts_release);
+
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("TI CPTS driver");
 MODULE_AUTHOR("Richard Cochran <richardcochran@gmail.com>");
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index 29a1e80c..e7d857c 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -20,6 +20,8 @@
 #ifndef _TI_CPTS_H_
 #define _TI_CPTS_H_
 
+#if IS_ENABLED(CONFIG_TI_CPTS)
+
 #include <linux/clk.h>
 #include <linux/clkdev.h>
 #include <linux/clocksource.h>
@@ -108,10 +110,10 @@ struct cpts_event {
 };
 
 struct cpts {
+	struct device *dev;
 	struct cpsw_cpts __iomem *reg;
 	int tx_enable;
 	int rx_enable;
-#if IS_ENABLED(CONFIG_TI_CPTS)
 	struct ptp_clock_info info;
 	struct ptp_clock *clock;
 	spinlock_t lock; /* protects time registers */
@@ -124,14 +126,15 @@ struct cpts {
 	struct list_head events;
 	struct list_head pool;
 	struct cpts_event pool_data[CPTS_MAX_EVENTS];
-#endif
 };
 
-#if IS_ENABLED(CONFIG_TI_CPTS)
 void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb);
 void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb);
-int cpts_register(struct device *dev, struct cpts *cpts, u32 mult, u32 shift);
+int cpts_register(struct cpts *cpts);
 void cpts_unregister(struct cpts *cpts);
+struct cpts *cpts_create(struct device *dev, void __iomem *regs,
+			 u32 mult, u32 shift);
+void cpts_release(struct cpts *cpts);
 
 static inline void cpts_rx_enable(struct cpts *cpts, int enable)
 {
@@ -154,6 +157,8 @@ static inline bool cpts_is_tx_enabled(struct cpts *cpts)
 }
 
 #else
+struct cpts;
+
 static inline void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb)
 {
 }
@@ -161,8 +166,19 @@ static inline void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb)
 {
 }
 
+static inline
+struct cpts *cpts_create(struct device *dev, void __iomem *regs,
+			 u32 mult, u32 shift)
+{
+	return NULL;
+}
+
+static inline void cpts_release(struct cpts *cpts)
+{
+}
+
 static inline int
-cpts_register(struct device *dev, struct cpts *cpts, u32 mult, u32 shift)
+cpts_register(struct cpts *cpts)
 {
 	return 0;
 }
-- 
2.10.1

^ permalink raw reply related

* [PATCH v3 08/13] net: ethernet: ti: cpts: drop excessive writes to CTRL and INT_EN regs
From: Grygorii Strashko @ 2016-12-02 20:30 UTC (permalink / raw)
  To: David S. Miller, netdev, Mugunthan V N, Richard Cochran
  Cc: Sekhar Nori, linux-kernel, linux-omap, Rob Herring, devicetree,
	Murali Karicheri, Wingman Kwok, Thomas Gleixner,
	Grygorii Strashko
In-Reply-To: <20161202203023.25526-1-grygorii.strashko@ti.com>

CPTS module and IRQs are always enabled when CPTS is registered,
before starting overflow check work, and disabled during
deregistration, when overflow check work has been canceled already.
So, It doesn't require to (re)enable CPTS module and IRQs in
cpts_overflow_check().

Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
---
 drivers/net/ethernet/ti/cpts.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 8266459..a662c33 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -243,8 +243,6 @@ static void cpts_overflow_check(struct work_struct *work)
 	struct timespec64 ts;
 	struct cpts *cpts = container_of(work, struct cpts, overflow_work.work);
 
-	cpts_write32(cpts, CPTS_EN, control);
-	cpts_write32(cpts, TS_PEND_EN, int_enable);
 	cpts_ptp_gettime(&cpts->info, &ts);
 	pr_debug("cpts overflow check at %lld.%09lu\n", ts.tv_sec, ts.tv_nsec);
 	schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD);
-- 
2.10.1

^ permalink raw reply related

* [PATCH v3 07/13] net: ethernet: ti: cpts: clean up event list if event pool is empty
From: Grygorii Strashko @ 2016-12-02 20:30 UTC (permalink / raw)
  To: David S. Miller, netdev, Mugunthan V N, Richard Cochran
  Cc: Sekhar Nori, linux-kernel, linux-omap, Rob Herring, devicetree,
	Murali Karicheri, Wingman Kwok, Thomas Gleixner,
	Grygorii Strashko
In-Reply-To: <20161202203023.25526-1-grygorii.strashko@ti.com>

From: WingMan Kwok <w-kwok2@ti.com>

When a CPTS user does not exit gracefully by disabling cpts
timestamping and leaving a joined multicast group, the system
continues to receive and timestamps the ptp packets which eventually
occupy all the event list entries.  When this happns, the added code
tries to remove some list entries which are expired.

Signed-off-by: WingMan Kwok <w-kwok2@ti.com>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
---
 drivers/net/ethernet/ti/cpts.c | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index d3c1ac5..8266459 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -57,6 +57,26 @@ static int cpts_fifo_pop(struct cpts *cpts, u32 *high, u32 *low)
 	return -1;
 }
 
+static int cpts_purge_events(struct cpts *cpts)
+{
+	struct list_head *this, *next;
+	struct cpts_event *event;
+	int removed = 0;
+
+	list_for_each_safe(this, next, &cpts->events) {
+		event = list_entry(this, struct cpts_event, list);
+		if (event_expired(event)) {
+			list_del_init(&event->list);
+			list_add(&event->list, &cpts->pool);
+			++removed;
+		}
+	}
+
+	if (removed)
+		dev_dbg(cpts->dev, "cpts: event pool cleaned up %d\n", removed);
+	return removed ? 0 : -1;
+}
+
 /*
  * Returns zero if matching event type was found.
  */
@@ -69,10 +89,12 @@ static int cpts_fifo_read(struct cpts *cpts, int match)
 	for (i = 0; i < CPTS_FIFO_DEPTH; i++) {
 		if (cpts_fifo_pop(cpts, &hi, &lo))
 			break;
-		if (list_empty(&cpts->pool)) {
-			pr_err("cpts: event pool is empty\n");
+
+		if (list_empty(&cpts->pool) && cpts_purge_events(cpts)) {
+			dev_err(cpts->dev, "cpts: event pool empty\n");
 			return -1;
 		}
+
 		event = list_first_entry(&cpts->pool, struct cpts_event, list);
 		event->tmo = jiffies + 2;
 		event->high = hi;
-- 
2.10.1

^ permalink raw reply related

* Re: [PATCH ethtool v5 0/2]  Adding downshift support to ethtool
From: John W. Linville @ 2016-12-02 20:24 UTC (permalink / raw)
  To: Allan W. Nielsen; +Cc: netdev, andrew, f.fainelli, raju.lakkaraju
In-Reply-To: <1479977811-5603-1-git-send-email-allan.nielsen@microsemi.com>

On Thu, Nov 24, 2016 at 09:56:49AM +0100, Allan W. Nielsen wrote:
> (downshift feature is applied in the net-next tree - d3c19c0a72)
> 
> This series adds support for downshift (using phy-tunables).
> 
> Downshifting can either be turned on/off, or it can be configured to a
> specifc count. "count" is optional.
> 
> Change set:
> v1:
> - Initial version of set/get phy tunable with downshift feature.
> v2:
> - (ethtool) Syntax is changed from "--set-phy-tunable downshift on|off|%d"
>   to "--set-phy-tunable [downshift on|off [count N]]" - as requested by
>   Andrew.
> v3:
> - Fixed Spelling in "ethtool-copy.h:sync with net" 
> - Fixed "if send_ioctl() returns an error, print the error message and then
>   still print th value of count".
> v4:
> - Fixing spelling in the example included in the commit message
> - Improve the description in the man-page
> v5:
> - re-sync ethtool.h from the net-next tree.
> 
> 
> Allan W. Nielsen (1):
>   ethtool-copy.h:sync with net-next
> 
> Raju Lakkaraju (1):
>   Ethtool: Implements ETHTOOL_PHY_GTUNABLE/ETHTOOL_PHY_STUNABLE and PHY
>     downshift
> 
>  ethtool-copy.h |  21 +++++++--
>  ethtool.8.in   |  40 ++++++++++++++++
>  ethtool.c      | 144 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 202 insertions(+), 3 deletions(-)
> 
> -- 
> 2.7.3

Merged.

-- 
John W. Linville		Someday the world will need a hero, and you
linville@tuxdriver.com			might be all we have.  Be ready.

^ permalink raw reply

* [PATCH v3 05/13] net: ethernet: ti: cpts: fix registration order
From: Grygorii Strashko @ 2016-12-02 20:30 UTC (permalink / raw)
  To: David S. Miller, netdev, Mugunthan V N, Richard Cochran
  Cc: Sekhar Nori, linux-kernel, linux-omap, Rob Herring, devicetree,
	Murali Karicheri, Wingman Kwok, Thomas Gleixner,
	Grygorii Strashko
In-Reply-To: <20161202203023.25526-1-grygorii.strashko@ti.com>

The ptp clock registered before spinlock, which is protecting it, and
before timecounter and cyclecounter initialization in cpts_register().

So, ensure that ptp clock is registered the last, after everything
else is done.

Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
---
 drivers/net/ethernet/ti/cpts.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 61198f1..3dda6d5 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -356,15 +356,8 @@ int cpts_register(struct device *dev, struct cpts *cpts,
 		  u32 mult, u32 shift)
 {
 	int err, i;
-	unsigned long flags;
 
 	cpts->info = cpts_info;
-	cpts->clock = ptp_clock_register(&cpts->info, dev);
-	if (IS_ERR(cpts->clock)) {
-		err = PTR_ERR(cpts->clock);
-		cpts->clock = NULL;
-		return err;
-	}
 	spin_lock_init(&cpts->lock);
 
 	cpts->cc.read = cpts_systim_read;
@@ -382,15 +375,26 @@ int cpts_register(struct device *dev, struct cpts *cpts,
 	cpts_write32(cpts, CPTS_EN, control);
 	cpts_write32(cpts, TS_PEND_EN, int_enable);
 
-	spin_lock_irqsave(&cpts->lock, flags);
 	timecounter_init(&cpts->tc, &cpts->cc, ktime_to_ns(ktime_get_real()));
-	spin_unlock_irqrestore(&cpts->lock, flags);
 
 	INIT_DELAYED_WORK(&cpts->overflow_work, cpts_overflow_check);
-	schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD);
 
+	cpts->clock = ptp_clock_register(&cpts->info, dev);
+	if (IS_ERR(cpts->clock)) {
+		err = PTR_ERR(cpts->clock);
+		cpts->clock = NULL;
+		goto err_ptp;
+	}
 	cpts->phc_index = ptp_clock_index(cpts->clock);
+
+	schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD);
+
 	return 0;
+
+err_ptp:
+	if (cpts->refclk)
+		cpts_clk_release(cpts);
+	return err;
 }
 EXPORT_SYMBOL_GPL(cpts_register);
 
-- 
2.10.1

^ permalink raw reply related

* [PATCH v3 04/13] net: ethernet: ti: cpts: fix unbalanced clk api usage in cpts_register/unregister
From: Grygorii Strashko @ 2016-12-02 20:30 UTC (permalink / raw)
  To: David S. Miller, netdev-u79uwXL29TY76Z2rM5mHXA, Mugunthan V N,
	Richard Cochran
  Cc: Sekhar Nori, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-omap-u79uwXL29TY76Z2rM5mHXA, Rob Herring,
	devicetree-u79uwXL29TY76Z2rM5mHXA, Murali Karicheri, Wingman Kwok,
	Thomas Gleixner, Grygorii Strashko
In-Reply-To: <20161202203023.25526-1-grygorii.strashko-l0cyMroinI0@public.gmane.org>

There are two issues with TI CPTS code which are reproducible when TI
CPSW ethX device passes few up/down iterations:
- cpts refclk prepare counter continuously incremented after each
up/down iteration;
- devm_clk_get(dev, "cpts") is called many times.

Hence, fix these issues by using clk_disable_unprepare() in
cpts_clk_release() and skipping devm_clk_get() if cpts refclk has been
acquired already.

Acked-by: Richard Cochran <richardcochran-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>
Signed-off-by: Grygorii Strashko <grygorii.strashko-l0cyMroinI0@public.gmane.org>
---
 drivers/net/ethernet/ti/cpts.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 8cb0369..61198f1 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -230,18 +230,20 @@ static void cpts_overflow_check(struct work_struct *work)
 
 static void cpts_clk_init(struct device *dev, struct cpts *cpts)
 {
-	cpts->refclk = devm_clk_get(dev, "cpts");
-	if (IS_ERR(cpts->refclk)) {
-		dev_err(dev, "Failed to get cpts refclk\n");
-		cpts->refclk = NULL;
-		return;
+	if (!cpts->refclk) {
+		cpts->refclk = devm_clk_get(dev, "cpts");
+		if (IS_ERR(cpts->refclk)) {
+			dev_err(dev, "Failed to get cpts refclk\n");
+			cpts->refclk = NULL;
+			return;
+		}
 	}
 	clk_prepare_enable(cpts->refclk);
 }
 
 static void cpts_clk_release(struct cpts *cpts)
 {
-	clk_disable(cpts->refclk);
+	clk_disable_unprepare(cpts->refclk);
 }
 
 static int cpts_match(struct sk_buff *skb, unsigned int ptp_class,
-- 
2.10.1

--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 03/13] net: ethernet: ti: cpsw: minimize direct access to struct cpts
From: Grygorii Strashko @ 2016-12-02 20:30 UTC (permalink / raw)
  To: David S. Miller, netdev, Mugunthan V N, Richard Cochran
  Cc: Sekhar Nori, linux-kernel, linux-omap, Rob Herring, devicetree,
	Murali Karicheri, Wingman Kwok, Thomas Gleixner,
	Grygorii Strashko
In-Reply-To: <20161202203023.25526-1-grygorii.strashko@ti.com>

This will provide more flexibility in changing CPTS internals and also
required for further changes.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
---
 drivers/net/ethernet/ti/cpsw.c | 28 +++++++++++++++-------------
 drivers/net/ethernet/ti/cpts.h | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index f65a4e8..a6a93ad 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1481,7 +1481,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
 	}
 
 	if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
-				cpsw->cpts->tx_enable)
+	    cpts_is_tx_enabled(cpsw->cpts))
 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 
 	skb_tx_timestamp(skb);
@@ -1520,7 +1520,8 @@ static void cpsw_hwtstamp_v1(struct cpsw_common *cpsw)
 	struct cpsw_slave *slave = &cpsw->slaves[cpsw->data.active_slave];
 	u32 ts_en, seq_id;
 
-	if (!cpsw->cpts->tx_enable && !cpsw->cpts->rx_enable) {
+	if (!cpts_is_tx_enabled(cpsw->cpts) &&
+	    !cpts_is_rx_enabled(cpsw->cpts)) {
 		slave_write(slave, 0, CPSW1_TS_CTL);
 		return;
 	}
@@ -1528,10 +1529,10 @@ static void cpsw_hwtstamp_v1(struct cpsw_common *cpsw)
 	seq_id = (30 << CPSW_V1_SEQ_ID_OFS_SHIFT) | ETH_P_1588;
 	ts_en = EVENT_MSG_BITS << CPSW_V1_MSG_TYPE_OFS;
 
-	if (cpsw->cpts->tx_enable)
+	if (cpts_is_tx_enabled(cpsw->cpts))
 		ts_en |= CPSW_V1_TS_TX_EN;
 
-	if (cpsw->cpts->rx_enable)
+	if (cpts_is_rx_enabled(cpsw->cpts))
 		ts_en |= CPSW_V1_TS_RX_EN;
 
 	slave_write(slave, ts_en, CPSW1_TS_CTL);
@@ -1554,20 +1555,20 @@ static void cpsw_hwtstamp_v2(struct cpsw_priv *priv)
 	case CPSW_VERSION_2:
 		ctrl &= ~CTRL_V2_ALL_TS_MASK;
 
-		if (cpsw->cpts->tx_enable)
+		if (cpts_is_tx_enabled(cpsw->cpts))
 			ctrl |= CTRL_V2_TX_TS_BITS;
 
-		if (cpsw->cpts->rx_enable)
+		if (cpts_is_rx_enabled(cpsw->cpts))
 			ctrl |= CTRL_V2_RX_TS_BITS;
 		break;
 	case CPSW_VERSION_3:
 	default:
 		ctrl &= ~CTRL_V3_ALL_TS_MASK;
 
-		if (cpsw->cpts->tx_enable)
+		if (cpts_is_tx_enabled(cpsw->cpts))
 			ctrl |= CTRL_V3_TX_TS_BITS;
 
-		if (cpsw->cpts->rx_enable)
+		if (cpts_is_rx_enabled(cpsw->cpts))
 			ctrl |= CTRL_V3_RX_TS_BITS;
 		break;
 	}
@@ -1603,7 +1604,7 @@ static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
 
 	switch (cfg.rx_filter) {
 	case HWTSTAMP_FILTER_NONE:
-		cpts->rx_enable = 0;
+		cpts_rx_enable(cpts, 0);
 		break;
 	case HWTSTAMP_FILTER_ALL:
 	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
@@ -1619,14 +1620,14 @@ static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
 	case HWTSTAMP_FILTER_PTP_V2_EVENT:
 	case HWTSTAMP_FILTER_PTP_V2_SYNC:
 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
-		cpts->rx_enable = 1;
+		cpts_rx_enable(cpts, 1);
 		cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
 		break;
 	default:
 		return -ERANGE;
 	}
 
-	cpts->tx_enable = cfg.tx_type == HWTSTAMP_TX_ON;
+	cpts_tx_enable(cpts, cfg.tx_type == HWTSTAMP_TX_ON);
 
 	switch (cpsw->version) {
 	case CPSW_VERSION_1:
@@ -1655,8 +1656,9 @@ static int cpsw_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
 		return -EOPNOTSUPP;
 
 	cfg.flags = 0;
-	cfg.tx_type = cpts->tx_enable ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
-	cfg.rx_filter = (cpts->rx_enable ?
+	cfg.tx_type = cpts_is_tx_enabled(cpts) ?
+		      HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+	cfg.rx_filter = (cpts_is_rx_enabled(cpts) ?
 			 HWTSTAMP_FILTER_PTP_V2_EVENT : HWTSTAMP_FILTER_NONE);
 
 	return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index 416ba2c..29a1e80c 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -132,6 +132,27 @@ void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb);
 void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb);
 int cpts_register(struct device *dev, struct cpts *cpts, u32 mult, u32 shift);
 void cpts_unregister(struct cpts *cpts);
+
+static inline void cpts_rx_enable(struct cpts *cpts, int enable)
+{
+	cpts->rx_enable = enable;
+}
+
+static inline bool cpts_is_rx_enabled(struct cpts *cpts)
+{
+	return !!cpts->rx_enable;
+}
+
+static inline void cpts_tx_enable(struct cpts *cpts, int enable)
+{
+	cpts->tx_enable = enable;
+}
+
+static inline bool cpts_is_tx_enabled(struct cpts *cpts)
+{
+	return !!cpts->tx_enable;
+}
+
 #else
 static inline void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb)
 {
@@ -149,6 +170,24 @@ cpts_register(struct device *dev, struct cpts *cpts, u32 mult, u32 shift)
 static inline void cpts_unregister(struct cpts *cpts)
 {
 }
+
+static inline void cpts_rx_enable(struct cpts *cpts, int enable)
+{
+}
+
+static inline bool cpts_is_rx_enabled(struct cpts *cpts)
+{
+	return false;
+}
+
+static inline void cpts_tx_enable(struct cpts *cpts, int enable)
+{
+}
+
+static inline bool cpts_is_tx_enabled(struct cpts *cpts)
+{
+	return false;
+}
 #endif
 
 
-- 
2.10.1

^ permalink raw reply related

* [PATCH v3 02/13] net: ethernet: ti: allow cpts to be built separately
From: Grygorii Strashko @ 2016-12-02 20:30 UTC (permalink / raw)
  To: David S. Miller, netdev, Mugunthan V N, Richard Cochran
  Cc: Sekhar Nori, linux-kernel, linux-omap, Rob Herring, devicetree,
	Murali Karicheri, Wingman Kwok, Thomas Gleixner,
	Grygorii Strashko
In-Reply-To: <20161202203023.25526-1-grygorii.strashko@ti.com>

TI CPTS IP is used as part of TI OMAP CPSW driver, but it's also
present as part of NETCP on TI Keystone 2 SoCs. So, It's required
to enable build of CPTS for both this drivers and this can be
achieved by allowing CPTS to be built separately.

Hence, allow cpts to be built separately and convert it to be
a module as both CPSW and NETCP drives can be built as modules.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
---
 drivers/net/ethernet/ti/Kconfig  |  2 +-
 drivers/net/ethernet/ti/Makefile |  3 ++-
 drivers/net/ethernet/ti/cpsw.c   | 22 +++++++++++++++++-----
 drivers/net/ethernet/ti/cpts.c   | 16 ++++++++--------
 drivers/net/ethernet/ti/cpts.h   | 18 ++++++++++++++----
 5 files changed, 42 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index 9904d74..ff7f518 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -74,7 +74,7 @@ config TI_CPSW
 	  will be called cpsw.
 
 config TI_CPTS
-	bool "TI Common Platform Time Sync (CPTS) Support"
+	tristate "TI Common Platform Time Sync (CPTS) Support"
 	depends on TI_CPSW
 	select PTP_1588_CLOCK
 	---help---
diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile
index d420d94..1e7c10b 100644
--- a/drivers/net/ethernet/ti/Makefile
+++ b/drivers/net/ethernet/ti/Makefile
@@ -12,8 +12,9 @@ obj-$(CONFIG_TI_DAVINCI_MDIO) += davinci_mdio.o
 obj-$(CONFIG_TI_DAVINCI_CPDMA) += davinci_cpdma.o
 obj-$(CONFIG_TI_CPSW_PHY_SEL) += cpsw-phy-sel.o
 obj-$(CONFIG_TI_CPSW_ALE) += cpsw_ale.o
+obj-$(CONFIG_TI_CPTS) += cpts.o
 obj-$(CONFIG_TI_CPSW) += ti_cpsw.o
-ti_cpsw-y := cpsw.o cpts.o
+ti_cpsw-y := cpsw.o
 
 obj-$(CONFIG_TI_KEYSTONE_NETCP) += keystone_netcp.o
 keystone_netcp-y := netcp_core.o
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 58947aa..f65a4e8 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1513,7 +1513,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
 	return NETDEV_TX_BUSY;
 }
 
-#ifdef CONFIG_TI_CPTS
+#if IS_ENABLED(CONFIG_TI_CPTS)
 
 static void cpsw_hwtstamp_v1(struct cpsw_common *cpsw)
 {
@@ -1661,7 +1661,16 @@ static int cpsw_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
 
 	return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
 }
+#else
+static int cpsw_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
+{
+	return -EOPNOTSUPP;
+}
 
+static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
+{
+	return -EOPNOTSUPP;
+}
 #endif /*CONFIG_TI_CPTS*/
 
 static int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
@@ -1674,12 +1683,10 @@ static int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
 		return -EINVAL;
 
 	switch (cmd) {
-#ifdef CONFIG_TI_CPTS
 	case SIOCSHWTSTAMP:
 		return cpsw_hwtstamp_set(dev, req);
 	case SIOCGHWTSTAMP:
 		return cpsw_hwtstamp_get(dev, req);
-#endif
 	}
 
 	if (!cpsw->slaves[slave_no].phy)
@@ -1935,10 +1942,10 @@ static void cpsw_set_msglevel(struct net_device *ndev, u32 value)
 	priv->msg_enable = value;
 }
 
+#if IS_ENABLED(CONFIG_TI_CPTS)
 static int cpsw_get_ts_info(struct net_device *ndev,
 			    struct ethtool_ts_info *info)
 {
-#ifdef CONFIG_TI_CPTS
 	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
 	info->so_timestamping =
@@ -1955,7 +1962,12 @@ static int cpsw_get_ts_info(struct net_device *ndev,
 	info->rx_filters =
 		(1 << HWTSTAMP_FILTER_NONE) |
 		(1 << HWTSTAMP_FILTER_PTP_V2_EVENT);
+	return 0;
+}
 #else
+static int cpsw_get_ts_info(struct net_device *ndev,
+			    struct ethtool_ts_info *info)
+{
 	info->so_timestamping =
 		SOF_TIMESTAMPING_TX_SOFTWARE |
 		SOF_TIMESTAMPING_RX_SOFTWARE |
@@ -1963,9 +1975,9 @@ static int cpsw_get_ts_info(struct net_device *ndev,
 	info->phc_index = -1;
 	info->tx_types = 0;
 	info->rx_filters = 0;
-#endif
 	return 0;
 }
+#endif
 
 static int cpsw_get_settings(struct net_device *ndev,
 			     struct ethtool_cmd *ecmd)
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index a42c449..8cb0369 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -31,8 +31,6 @@
 
 #include "cpts.h"
 
-#ifdef CONFIG_TI_CPTS
-
 #define cpts_read32(c, r)	readl_relaxed(&c->reg->r)
 #define cpts_write32(c, v, r)	writel_relaxed(v, &c->reg->r)
 
@@ -334,6 +332,7 @@ void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb)
 	memset(ssh, 0, sizeof(*ssh));
 	ssh->hwtstamp = ns_to_ktime(ns);
 }
+EXPORT_SYMBOL_GPL(cpts_rx_timestamp);
 
 void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb)
 {
@@ -349,13 +348,11 @@ void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb)
 	ssh.hwtstamp = ns_to_ktime(ns);
 	skb_tstamp_tx(skb, &ssh);
 }
-
-#endif /*CONFIG_TI_CPTS*/
+EXPORT_SYMBOL_GPL(cpts_tx_timestamp);
 
 int cpts_register(struct device *dev, struct cpts *cpts,
 		  u32 mult, u32 shift)
 {
-#ifdef CONFIG_TI_CPTS
 	int err, i;
 	unsigned long flags;
 
@@ -391,18 +388,21 @@ int cpts_register(struct device *dev, struct cpts *cpts,
 	schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD);
 
 	cpts->phc_index = ptp_clock_index(cpts->clock);
-#endif
 	return 0;
 }
+EXPORT_SYMBOL_GPL(cpts_register);
 
 void cpts_unregister(struct cpts *cpts)
 {
-#ifdef CONFIG_TI_CPTS
 	if (cpts->clock) {
 		ptp_clock_unregister(cpts->clock);
 		cancel_delayed_work_sync(&cpts->overflow_work);
 	}
 	if (cpts->refclk)
 		cpts_clk_release(cpts);
-#endif
 }
+EXPORT_SYMBOL_GPL(cpts_unregister);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("TI CPTS driver");
+MODULE_AUTHOR("Richard Cochran <richardcochran@gmail.com>");
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index 69a46b9..416ba2c 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -111,7 +111,7 @@ struct cpts {
 	struct cpsw_cpts __iomem *reg;
 	int tx_enable;
 	int rx_enable;
-#ifdef CONFIG_TI_CPTS
+#if IS_ENABLED(CONFIG_TI_CPTS)
 	struct ptp_clock_info info;
 	struct ptp_clock *clock;
 	spinlock_t lock; /* protects time registers */
@@ -127,9 +127,11 @@ struct cpts {
 #endif
 };
 
-#ifdef CONFIG_TI_CPTS
+#if IS_ENABLED(CONFIG_TI_CPTS)
 void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb);
 void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb);
+int cpts_register(struct device *dev, struct cpts *cpts, u32 mult, u32 shift);
+void cpts_unregister(struct cpts *cpts);
 #else
 static inline void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb)
 {
@@ -137,9 +139,17 @@ static inline void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb)
 static inline void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb)
 {
 }
+
+static inline int
+cpts_register(struct device *dev, struct cpts *cpts, u32 mult, u32 shift)
+{
+	return 0;
+}
+
+static inline void cpts_unregister(struct cpts *cpts)
+{
+}
 #endif
 
-int cpts_register(struct device *dev, struct cpts *cpts, u32 mult, u32 shift);
-void cpts_unregister(struct cpts *cpts);
 
 #endif
-- 
2.10.1

^ permalink raw reply related

* [PATCH v3 01/13] net: ethernet: ti: cpts: switch to readl/writel_relaxed()
From: Grygorii Strashko @ 2016-12-02 20:30 UTC (permalink / raw)
  To: David S. Miller, netdev, Mugunthan V N, Richard Cochran
  Cc: Sekhar Nori, linux-kernel, linux-omap, Rob Herring, devicetree,
	Murali Karicheri, Wingman Kwok, Thomas Gleixner,
	Grygorii Strashko
In-Reply-To: <20161202203023.25526-1-grygorii.strashko@ti.com>

Switch to readl/writel_relaxed() APIs, because this is recommended
API and the CPTS IP is reused on Keystone 2 SoCs
where LE/BE modes are supported.

Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
---
 drivers/net/ethernet/ti/cpts.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 85a55b4..a42c449 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -33,8 +33,8 @@
 
 #ifdef CONFIG_TI_CPTS
 
-#define cpts_read32(c, r)	__raw_readl(&c->reg->r)
-#define cpts_write32(c, v, r)	__raw_writel(v, &c->reg->r)
+#define cpts_read32(c, r)	readl_relaxed(&c->reg->r)
+#define cpts_write32(c, v, r)	writel_relaxed(v, &c->reg->r)
 
 static int event_expired(struct cpts_event *event)
 {
-- 
2.10.1

^ permalink raw reply related

* [PATCH v3 00/13] net: ethernet: ti: cpts: update and fixes
From: Grygorii Strashko @ 2016-12-02 20:30 UTC (permalink / raw)
  To: David S. Miller, netdev-u79uwXL29TY76Z2rM5mHXA, Mugunthan V N,
	Richard Cochran
  Cc: Sekhar Nori, linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-omap-u79uwXL29TY76Z2rM5mHXA, Rob Herring,
	devicetree-u79uwXL29TY76Z2rM5mHXA, Murali Karicheri, Wingman Kwok,
	Thomas Gleixner, Grygorii Strashko

It is preparation series intended to clean up and optimize TI CPTS driver to
facilitate further integration with other TI's SoCs like Keystone 2.

Changes in v3:
- patches reordered: fixes and small updates moved first
- added comments in code about cpts->cc_mult
- conversation range (maxsec) limited to 10sec

Changes in v2:
- patch "net: ethernet: ti: cpts: rework initialization/deinitialization"
  was split on 4 patches
- applied comments from Richard Cochran
- dropped patch
  "net: ethernet: ti: cpts: add return value to tx and rx timestamp funcitons"
- new patches added:
  "net: ethernet: ti: cpts: drop excessive writes to CTRL and INT_EN regs"
  and "clocksource: export the clocks_calc_mult_shift to use by timestamp code"

Links on prev versions:
v2: http://www.mail-archive.com/linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org/msg1282034.html
v1: http://www.spinics.net/lists/linux-omap/msg131925.html

Grygorii Strashko (11):
  net: ethernet: ti: cpts: switch to readl/writel_relaxed()
  net: ethernet: ti: allow cpts to be built separately
  net: ethernet: ti: cpsw: minimize direct access to struct cpts
  net: ethernet: ti: cpts: fix unbalanced clk api usage in cpts_register/unregister
  net: ethernet: ti: cpts: fix registration order
  net: ethernet: ti: cpts: disable cpts when unregistered
  net: ethernet: ti: cpts: drop excessive writes to CTRL and INT_EN regs
  net: ethernet: ti: cpts: rework initialization/deinitialization
  net: ethernet: ti: cpts: move dt props parsing to cpts driver
  net: ethernet: ti: cpts: calc mult and shift from refclk freq
  net: ethernet: ti: cpts: fix overflow check period

Murali Karicheri (1):
  clocksource: export the clocks_calc_mult_shift to use by timestamp code

WingMan Kwok (1):
  net: ethernet: ti: cpts: clean up event list if event pool is empty

 Documentation/devicetree/bindings/net/cpsw.txt |   8 +-
 drivers/net/ethernet/ti/Kconfig                |   2 +-
 drivers/net/ethernet/ti/Makefile               |   3 +-
 drivers/net/ethernet/ti/cpsw.c                 |  84 ++++-----
 drivers/net/ethernet/ti/cpsw.h                 |   2 -
 drivers/net/ethernet/ti/cpts.c                 | 239 +++++++++++++++++++------
 drivers/net/ethernet/ti/cpts.h                 |  80 ++++++++-
 kernel/time/clocksource.c                      |   1 +
 8 files changed, 304 insertions(+), 115 deletions(-)

-- 
2.10.1

--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [flamebait] xdp, well meaning but pointless
From: Tom Herbert @ 2016-12-02 20:19 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Hannes Frederic Sowa, Jesper Dangaard Brouer, Thomas Graf,
	Florian Westphal, Linux Kernel Network Developers
In-Reply-To: <20161202115644.5ff2e408@xeon-e3>

On Fri, Dec 2, 2016 at 11:56 AM, Stephen Hemminger
<stephen@networkplumber.org> wrote:
> On Fri, 2 Dec 2016 19:12:00 +0100
> Hannes Frederic Sowa <hannes@stressinduktion.org> wrote:
>
>> On 02.12.2016 17:59, Tom Herbert wrote:
>> > On Fri, Dec 2, 2016 at 3:54 AM, Hannes Frederic Sowa
>> > <hannes@stressinduktion.org> wrote:
>> >> On 02.12.2016 11:24, Jesper Dangaard Brouer wrote:
>> >>> On Thu, 1 Dec 2016 13:51:32 -0800
>> >>> Tom Herbert <tom@herbertland.com> wrote:
>> >>>
>> >>>>>> The technical plenary at last IETF on Seoul a couple of weeks ago was
>> >>>>>> exclusively focussed on DDOS in light of the recent attack against
>> >>>>>> Dyn. There were speakers form Cloudflare and Dyn. The Cloudflare
>> >>>>>> presentation by Nick Sullivan
>> >>>>>> (https://www.ietf.org/proceedings/97/slides/slides-97-ietf-sessb-how-to-stay-online-harsh-realities-of-operating-in-a-hostile-network-nick-sullivan-01.pdf)
>> >>>>>> alluded to some implementation of DDOS mitigation. In particular, on
>> >>>>>> slide 6 Nick gave some numbers for drop rates in DDOS. The "kernel"
>> >>>
>> >>> slide 14
>> >>>
>> >>>>>> numbers he gave we're based in iptables+BPF and that was a whole
>> >>>>>> 1.2Mpps-- somehow that seems ridiculously to me (I said so at the mic
>> >>>>>> and that's also when I introduced XDP to whole IETF :-) ). If that's
>> >>>>>> the best we can do the Internet is in a world hurt. DDOS mitigation
>> >>>>>> alone is probably a sufficient motivation to look at XDP. We need
>> >>>>>> something that drops bad packets as quickly as possible when under
>> >>>>>> attack, we need this to be integrated into the stack, we need it to be
>> >>>>>> programmable to deal with the increasing savvy of attackers, and we
>> >>>>>> don't want to be forced to be dependent on HW solutions. This is why
>> >>>>>> we created XDP!
>> >>>
>> >>> The 1.2Mpps number is a bit low, but we are unfortunately in that
>> >>> ballpark.
>> >>>
>> >>>>> I totally understand that. But in my reply to David in this thread I
>> >>>>> mentioned DNS apex processing as being problematic which is actually
>> >>>>> being referred in your linked slide deck on page 9 ("What do floods look
>> >>>>> like") and the problematic of parsing DNS packets in XDP due to string
>> >>>>> processing and looping inside eBPF.
>> >>>
>> >>> That is a weak argument. You do realize CloudFlare actually use eBPF to
>> >>> do this exact filtering, and (so-far) eBPF for parsing DNS have been
>> >>> sufficient for them.
>> >>
>> >> You are talking about this code on the following slides (I actually
>> >> transcribed it for you here and disassembled):
>> >>
>> >> l0:     ld #0x14
>> >> l1:     ldxb 4*([0]&0xf)
>> >> l2:     add x
>> >> l3:     tax
>> >> l4:     ld [x+0]
>> >> l5:     jeq #0x7657861, l6, l13
>> >> l6:     ld [x+4]
>> >> l7:     jeq #0x6d706c65, l8, l13
>> >> l8:     ld [x+8]
>> >> l9:     jeq #0x3636f6d, l10, l13
>> >> l10:    ldb [x+12]
>> >> l11:    jeq #0, l12, l13
>> >> l12:    ret #0x1
>> >> l13:    ret #0
>> >>
>> >> You can offload this to u32 in hardware if that is what you want.
>> >>
>> >> The reason this works is because of netfilter, which allows them to
>> >> dynamically generate BPF programs and insert and delete them from
>> >> chains, do intersection or unions of them.
>> >>
>> >> If you have a freestanding program like in XDP the complexity space is a
>> >> different one and not comparable to this at all.
>> >>
>> > I don't understand this comment about complexity especially in regards
>> > to the idea of offloading u32 to hardware. Relying on hardware to do
>> > anything always leads to more complexity than an equivalent SW
>> > implementation for the same functionality. The only reason we ever use
>> > a hardware mechanisms is if it gives *significantly* better
>> > performance. If the performance difference isn't there then doing
>> > things in SW is going to be the better path (as we see in XDP).
>>
>> I am just wondering why the u32 filter wasn't mentioned in their slide
>> deck. If all what Cloudflare needs are those kind of matches, they are
>> in fact actually easier to generate than an cBPF program. It is not a
>> good example of how a real world DoS filter in XDP would look like.
>>
>> If you argue XDP as a C function hook that can call arbitrary code in
>> the driver before submitting that to the networking stack, yep, that is
>> not complex at all. Depending on how those modules will be maintained,
>> they either end up in the kernel and will be updated on major changes or
>> are 3rd party and people have to update them and also depend on the
>> driver features.
>>
>> But this opens up a whole new can of worms also. I haven't really
>> thought this through completely, but last time the patches were nack'ed
>> with lots of strong opinions and I tended to agree with them. I am
>> revisiting this position.
>>
>> Certainly you can build real-world DoS protection with this function
>> pointer hook and C code in the driver. In this case a user space
>> solution still has advantages because of maintainability, as e.g. with
>> netmap or dpdk you are again decoupled from the in-kernel API/ABI and
>> don't need to test, recompile etc. on each kernel upgrade. If the module
>> ends up in the kernel, those problems might also disappear.
>>
>> For XDP+eBPF to provide a full DoS mitigation (protocol parsing,
>> sampling and dropping) solution seems to be too complex for me because
>> of the arguments I stated in my previous mail.
>
> I take a "horses for courses" attitude.
>  - XDP is better for providing high speed packet mangling. It is more
>    programmable and faster than existing TC, iptables, nftables, infrastructure.
>
>  - DPDK is better for implementing a networking infrastructure application.
>
> To give two examples.  Implementing something as complex as FD.io/VPP
> with XDP would be massive undertaking and not worth the effort. Likewise
> reimplementing the full Linux networking stack with all the work on
> congestion control, queue management and socket API's in DPDK would
> be waste of effort. That is not to say that someone won't try it,
> but it will create more bloat and bugs.
>
> Unfortunately, both camps seem to have a high NIMBY quotient and
> things are being developed for their own self interest. This is ok
> as long as the competition yields better software, but I am little
> concerned that is just going to cause more complexity with no gain.
>
> Also, the end users are confused. I have heard from people involved
> in NFV that want to use XDP. And users of server applications that
> want to use DPDK.
>
As davem said at netdev: "DPDK is not Linux". I don't see that it's
our problem to try to figure out how to make DPDK complementary
somehow or work together in harmony to resolve end user confusion. If
users want to use DPDK that's their prerogative,  but other that
providing a good reference for performance I don't see how this
impacts Linux nor the direction we need to take things. We've already
seen this same story play out with RDMA over the years...

Tom

>
>
>
>

^ permalink raw reply

* Re: [flamebait] xdp, well meaning but pointless
From: Stephen Hemminger @ 2016-12-02 19:56 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: Tom Herbert, Jesper Dangaard Brouer, Thomas Graf,
	Florian Westphal, Linux Kernel Network Developers
In-Reply-To: <6501991a-355d-bf76-0f23-576cabbfe21f@stressinduktion.org>

On Fri, 2 Dec 2016 19:12:00 +0100
Hannes Frederic Sowa <hannes@stressinduktion.org> wrote:

> On 02.12.2016 17:59, Tom Herbert wrote:
> > On Fri, Dec 2, 2016 at 3:54 AM, Hannes Frederic Sowa
> > <hannes@stressinduktion.org> wrote:  
> >> On 02.12.2016 11:24, Jesper Dangaard Brouer wrote:  
> >>> On Thu, 1 Dec 2016 13:51:32 -0800
> >>> Tom Herbert <tom@herbertland.com> wrote:
> >>>  
> >>>>>> The technical plenary at last IETF on Seoul a couple of weeks ago was
> >>>>>> exclusively focussed on DDOS in light of the recent attack against
> >>>>>> Dyn. There were speakers form Cloudflare and Dyn. The Cloudflare
> >>>>>> presentation by Nick Sullivan
> >>>>>> (https://www.ietf.org/proceedings/97/slides/slides-97-ietf-sessb-how-to-stay-online-harsh-realities-of-operating-in-a-hostile-network-nick-sullivan-01.pdf)
> >>>>>> alluded to some implementation of DDOS mitigation. In particular, on
> >>>>>> slide 6 Nick gave some numbers for drop rates in DDOS. The "kernel"  
> >>>
> >>> slide 14
> >>>  
> >>>>>> numbers he gave we're based in iptables+BPF and that was a whole
> >>>>>> 1.2Mpps-- somehow that seems ridiculously to me (I said so at the mic
> >>>>>> and that's also when I introduced XDP to whole IETF :-) ). If that's
> >>>>>> the best we can do the Internet is in a world hurt. DDOS mitigation
> >>>>>> alone is probably a sufficient motivation to look at XDP. We need
> >>>>>> something that drops bad packets as quickly as possible when under
> >>>>>> attack, we need this to be integrated into the stack, we need it to be
> >>>>>> programmable to deal with the increasing savvy of attackers, and we
> >>>>>> don't want to be forced to be dependent on HW solutions. This is why
> >>>>>> we created XDP!  
> >>>
> >>> The 1.2Mpps number is a bit low, but we are unfortunately in that
> >>> ballpark.
> >>>  
> >>>>> I totally understand that. But in my reply to David in this thread I
> >>>>> mentioned DNS apex processing as being problematic which is actually
> >>>>> being referred in your linked slide deck on page 9 ("What do floods look
> >>>>> like") and the problematic of parsing DNS packets in XDP due to string
> >>>>> processing and looping inside eBPF.  
> >>>
> >>> That is a weak argument. You do realize CloudFlare actually use eBPF to
> >>> do this exact filtering, and (so-far) eBPF for parsing DNS have been
> >>> sufficient for them.  
> >>
> >> You are talking about this code on the following slides (I actually
> >> transcribed it for you here and disassembled):
> >>
> >> l0:     ld #0x14
> >> l1:     ldxb 4*([0]&0xf)
> >> l2:     add x
> >> l3:     tax
> >> l4:     ld [x+0]
> >> l5:     jeq #0x7657861, l6, l13
> >> l6:     ld [x+4]
> >> l7:     jeq #0x6d706c65, l8, l13
> >> l8:     ld [x+8]
> >> l9:     jeq #0x3636f6d, l10, l13
> >> l10:    ldb [x+12]
> >> l11:    jeq #0, l12, l13
> >> l12:    ret #0x1
> >> l13:    ret #0
> >>
> >> You can offload this to u32 in hardware if that is what you want.
> >>
> >> The reason this works is because of netfilter, which allows them to
> >> dynamically generate BPF programs and insert and delete them from
> >> chains, do intersection or unions of them.
> >>
> >> If you have a freestanding program like in XDP the complexity space is a
> >> different one and not comparable to this at all.
> >>  
> > I don't understand this comment about complexity especially in regards
> > to the idea of offloading u32 to hardware. Relying on hardware to do
> > anything always leads to more complexity than an equivalent SW
> > implementation for the same functionality. The only reason we ever use
> > a hardware mechanisms is if it gives *significantly* better
> > performance. If the performance difference isn't there then doing
> > things in SW is going to be the better path (as we see in XDP).  
> 
> I am just wondering why the u32 filter wasn't mentioned in their slide
> deck. If all what Cloudflare needs are those kind of matches, they are
> in fact actually easier to generate than an cBPF program. It is not a
> good example of how a real world DoS filter in XDP would look like.
> 
> If you argue XDP as a C function hook that can call arbitrary code in
> the driver before submitting that to the networking stack, yep, that is
> not complex at all. Depending on how those modules will be maintained,
> they either end up in the kernel and will be updated on major changes or
> are 3rd party and people have to update them and also depend on the
> driver features.
> 
> But this opens up a whole new can of worms also. I haven't really
> thought this through completely, but last time the patches were nack'ed
> with lots of strong opinions and I tended to agree with them. I am
> revisiting this position.
> 
> Certainly you can build real-world DoS protection with this function
> pointer hook and C code in the driver. In this case a user space
> solution still has advantages because of maintainability, as e.g. with
> netmap or dpdk you are again decoupled from the in-kernel API/ABI and
> don't need to test, recompile etc. on each kernel upgrade. If the module
> ends up in the kernel, those problems might also disappear.
> 
> For XDP+eBPF to provide a full DoS mitigation (protocol parsing,
> sampling and dropping) solution seems to be too complex for me because
> of the arguments I stated in my previous mail.

I take a "horses for courses" attitude.
 - XDP is better for providing high speed packet mangling. It is more
   programmable and faster than existing TC, iptables, nftables, infrastructure.

 - DPDK is better for implementing a networking infrastructure application.

To give two examples.  Implementing something as complex as FD.io/VPP
with XDP would be massive undertaking and not worth the effort. Likewise
reimplementing the full Linux networking stack with all the work on
congestion control, queue management and socket API's in DPDK would
be waste of effort. That is not to say that someone won't try it,
but it will create more bloat and bugs.

Unfortunately, both camps seem to have a high NIMBY quotient and
things are being developed for their own self interest. This is ok
as long as the competition yields better software, but I am little
concerned that is just going to cause more complexity with no gain.

Also, the end users are confused. I have heard from people involved
in NFV that want to use XDP. And users of server applications that
want to use DPDK.

^ permalink raw reply

* Re: bpf bounded loops. Was: [flamebait] xdp
From: Hannes Frederic Sowa @ 2016-12-02 19:50 UTC (permalink / raw)
  To: John Fastabend, Alexei Starovoitov
  Cc: Tom Herbert, Thomas Graf, Linux Kernel Network Developers,
	Daniel Borkmann, David S. Miller
In-Reply-To: <5841CE97.3050302@gmail.com>

On Fri, Dec 2, 2016, at 20:42, John Fastabend wrote:
> On 16-12-02 11:25 AM, Hannes Frederic Sowa wrote:
> > Hi,
> > 
> > On 02.12.2016 19:39, Alexei Starovoitov wrote:
> >> On Thu, Dec 01, 2016 at 10:27:12PM +0100, Hannes Frederic Sowa wrote:
> >>> like") and the problematic of parsing DNS packets in XDP due to string
> >>> processing and looping inside eBPF.
> >>
> >> Hannes,
> >> Not too long ago you proposed a very interesting idea to add
> >> support for bounded loops without adding any new bpf instructions and
> >> changing llvm (which was way better than my 'rep' like instructions
> >> I was experimenting with). I thought systemtap guys also wanted bounded
> >> loops and you were cooperating on the design, so I gave up on my work and
> >> was expecting an imminent patch from you. I guess it sounds like you know
> >> believe that bounded loops are impossible or I misunderstand your statement ?
> > 
> > Your argument was that it would need a new verifier as the current first
> > pass checks that we indeed can lay out the basic blocks as a DAG which
> > the second pass depends on. This would be violated.
> > 
> > Because eBPF is available by non privileged users this would need a lot
> > of effort to rewrite and verify (or indeed keep two verifiers in the
> > kernel for priv and non-priv). The verifier itself is exposed to
> > unprivileged users.
> 
> I missed this. Why the need for two verifiers?

Because of my fear that a more complex verifier will fail to provide the
same security guarantees than the old one, which already is relatively
complex.

> > Also, by design, if we keep the current limits, this would not give you
> > more instructions to operate on compared to the flattened version of the
> > program, it would merely reduce the numbers of optimizations in LLVM
> > that let the verifier reject the program.
> > 
> > Only enabling the relaxed verifier for root users seemed thus being
> > problematic as programs wouldn't be portable between nonprivileged and
> > privileged users.
> 
> Still a bit lost what does the relaxed verifier provide here?

It would allow a new instruction that is able to jump backwards. Ideally
it would be one verifier that allows this instruction and inserts the
counting logic in the BPF program.

> >> As far as pattern search for DNS packets...
> >> it was requested by Cloudflare guys back in March:
> >> https://github.com/iovisor/bcc/issues/471
> >> and it is useful for several tracing use cases as well.
> >> Unfortunately no one had time to implement it yet.
> > 
> > The string operations you proposed on the other hand, which would count
> > as one eBPF instructions, would give a lot more flexibility and allow
> > more cycles to burn, but don't help parsing binary protocols like IPv6
> > extension headers.
> 
> My rough thinking on this was the verifier had to start looking for loop
> invariants and to guarantee termination. Sounds scary in general but
> LLVM could put these in some normal form for us and the verifier could
> only accept decreasing loops, the invariants could be required to be
> integers, etc. By simplifying the loop enough the problem becomes
> tractable.

Which wouldn't buy more than LLVM simply unrolling everything, no?
Otherwise a lot of optimizations passes need to be touched. Alexei, what
do you think of this idea?

> I think this would be better than new instructions and/or multiple
> verifiers.

Bye,
Hannes

^ permalink raw reply

* Re: bpf bounded loops. Was: [flamebait] xdp
From: Hannes Frederic Sowa @ 2016-12-02 19:42 UTC (permalink / raw)
  To: Hannes Frederic Sowa, Alexei Starovoitov
  Cc: Tom Herbert, Thomas Graf, Linux Kernel Network Developers,
	Daniel Borkmann, David S. Miller
In-Reply-To: <2f3ce25c-3f59-9120-7d09-619be9b58e7a@stressinduktion.org>

On Fri, Dec 2, 2016, at 20:25, Hannes Frederic Sowa wrote:
> On 02.12.2016 19:39, Alexei Starovoitov wrote:
> > On Thu, Dec 01, 2016 at 10:27:12PM +0100, Hannes Frederic Sowa wrote:
> >> like") and the problematic of parsing DNS packets in XDP due to string
> >> processing and looping inside eBPF.
> > 
> > Hannes,
> > Not too long ago you proposed a very interesting idea to add
> > support for bounded loops without adding any new bpf instructions and
> > changing llvm (which was way better than my 'rep' like instructions
> > I was experimenting with). I thought systemtap guys also wanted bounded
> > loops and you were cooperating on the design, so I gave up on my work and
> > was expecting an imminent patch from you. I guess it sounds like you know
> > believe that bounded loops are impossible or I misunderstand your statement ?
> 
> Your argument was that it would need a new verifier as the current first
> pass checks that we indeed can lay out the basic blocks as a DAG which
> the second pass depends on. This would be violated.
> 
> Because eBPF is available by non privileged users this would need a lot
> of effort to rewrite and verify (or indeed keep two verifiers in the
> kernel for priv and non-priv). The verifier itself is exposed to
> unprivileged users.
> 
> Also, by design, if we keep the current limits, this would not give you
> more instructions to operate on compared to the flattened version of the
> program, it would merely reduce the numbers of optimizations in LLVM
> that let the verifier reject the program.
> 
> Only enabling the relaxed verifier for root users seemed thus being
> problematic as programs wouldn't be portable between nonprivileged and
> privileged users.

Quick addendum:

The only solution to protect the verifier, which I saw, would be to
limit it by time and space, thus making loading of eBPF programs
depending on how fast and hot (thermal throttling) one CPU thread is.

Those are the complexity problems I am talking and concerned about.

Bye,
Hannes

^ permalink raw reply

* Re: bpf bounded loops. Was: [flamebait] xdp
From: John Fastabend @ 2016-12-02 19:42 UTC (permalink / raw)
  To: Hannes Frederic Sowa, Alexei Starovoitov
  Cc: Tom Herbert, Thomas Graf, Linux Kernel Network Developers,
	Daniel Borkmann, David S. Miller
In-Reply-To: <2f3ce25c-3f59-9120-7d09-619be9b58e7a@stressinduktion.org>

On 16-12-02 11:25 AM, Hannes Frederic Sowa wrote:
> Hi,
> 
> On 02.12.2016 19:39, Alexei Starovoitov wrote:
>> On Thu, Dec 01, 2016 at 10:27:12PM +0100, Hannes Frederic Sowa wrote:
>>> like") and the problematic of parsing DNS packets in XDP due to string
>>> processing and looping inside eBPF.
>>
>> Hannes,
>> Not too long ago you proposed a very interesting idea to add
>> support for bounded loops without adding any new bpf instructions and
>> changing llvm (which was way better than my 'rep' like instructions
>> I was experimenting with). I thought systemtap guys also wanted bounded
>> loops and you were cooperating on the design, so I gave up on my work and
>> was expecting an imminent patch from you. I guess it sounds like you know
>> believe that bounded loops are impossible or I misunderstand your statement ?
> 
> Your argument was that it would need a new verifier as the current first
> pass checks that we indeed can lay out the basic blocks as a DAG which
> the second pass depends on. This would be violated.
> 
> Because eBPF is available by non privileged users this would need a lot
> of effort to rewrite and verify (or indeed keep two verifiers in the
> kernel for priv and non-priv). The verifier itself is exposed to
> unprivileged users.

I missed this. Why the need for two verifiers?

> 
> Also, by design, if we keep the current limits, this would not give you
> more instructions to operate on compared to the flattened version of the
> program, it would merely reduce the numbers of optimizations in LLVM
> that let the verifier reject the program.
> 
> Only enabling the relaxed verifier for root users seemed thus being
> problematic as programs wouldn't be portable between nonprivileged and
> privileged users.

Still a bit lost what does the relaxed verifier provide here?

> 
>> As far as pattern search for DNS packets...
>> it was requested by Cloudflare guys back in March:
>> https://github.com/iovisor/bcc/issues/471
>> and it is useful for several tracing use cases as well.
>> Unfortunately no one had time to implement it yet.
> 
> The string operations you proposed on the other hand, which would count
> as one eBPF instructions, would give a lot more flexibility and allow
> more cycles to burn, but don't help parsing binary protocols like IPv6
> extension headers.

My rough thinking on this was the verifier had to start looking for loop
invariants and to guarantee termination. Sounds scary in general but
LLVM could put these in some normal form for us and the verifier could
only accept decreasing loops, the invariants could be required to be
integers, etc. By simplifying the loop enough the problem becomes
tractable.

I think this would be better than new instructions and/or multiple
verifiers.

.John

> 
> Bye,
> Hannes
> 

^ permalink raw reply

* Re: [PATCHv2 net-next 3/4] net: dsa: mv88e6xxx: Move the tagging protocol into info
From: Vivien Didelot @ 2016-12-02 19:41 UTC (permalink / raw)
  To: Andrew Lunn, David Miller; +Cc: netdev, Andrew Lunn
In-Reply-To: <1480701779-30633-4-git-send-email-andrew@lunn.ch>

Hi Andrew,

Andrew Lunn <andrew@lunn.ch> writes:

> @@ -3749,6 +3756,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = {
>  		.global1_addr = 0x1b,
>  		.age_time_coeff = 15000,
>  		.g1_irqs = 9,
> +		.tag_protocol = DSA_TAG_PROTO_EDSA,
>  		.flags = MV88E6XXX_FLAGS_FAMILY_6352,
>  		.ops = &mv88e6172_ops,
>  	},

Since some chips support several protocols, we will have to turn
tag_protocol into a bitmask and introduce something like:

    enum mv88e6xxx_tag {
        MV88E6XXX_TAG_TRAILER = BIT(DSA_TAG_PROTO_TRAILER),
        MV88E6XXX_TAG_DSA = BIT(DSA_TAG_PROTO_DSA),
        MV88E6XXX_TAG_EDSA = BIT(DSA_TAG_PROTO_EDSA),
    };

But I guess it is OK to force a single one at the moment.

Reviewed-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>

Thanks,

        Vivien

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox