netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC][PATCH 0/1] net: support for hardware timestamps
@ 2008-07-29  0:07 Octavian Purdila
  2008-07-29  0:08 ` [RFC][PATCH 1/1] net: support for hardware timestamping Octavian Purdila
  0 siblings, 1 reply; 14+ messages in thread
From: Octavian Purdila @ 2008-07-29  0:07 UTC (permalink / raw)
  To: netdev

Hi everybody,

This is another attempt at hardware timestamps, based on idea
suggested by David Miller and Stephen Hemminger in the previous thread
([RFC][PATCH 0/3] net: per skb control messages).

Thanks,
tavi



^ permalink raw reply	[flat|nested] 14+ messages in thread

* [RFC][PATCH 1/1] net: support for hardware timestamping
  2008-07-29  0:07 [RFC][PATCH 0/1] net: support for hardware timestamps Octavian Purdila
@ 2008-07-29  0:08 ` Octavian Purdila
  2008-07-29 14:52   ` Patrick Ohly
  0 siblings, 1 reply; 14+ messages in thread
From: Octavian Purdila @ 2008-07-29  0:08 UTC (permalink / raw)
  To: netdev; +Cc: Octavian Purdila

This patch adds the infrastructure which allows net devices to store
hardware timestamps in the skb tstamp field so that they can
eventually be passed to userspace.

A bit in skb->tstamp is used to mark the timestamp as hardware
timestamp and a new net_device method, get_tstamp, is added to do the
conversion from the hardware specific timestamp to a "gettimeofday
compatible" timestamp.

A new net device ioctl is added (SIOCHWTSQUERY) to allow the
applications to retrieve information about the timestamping unit
(timer frequency and bits).

New socket option and socket control message are added as well
(SO_TIMESTAMPHW and SCM_TIMESTAMPHW).

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
---
 include/asm-powerpc/socket.h            |    3 +
 include/linux/if.h                      |    5 ++
 include/linux/netdevice.h               |    2 +
 include/linux/skbuff.h                  |   17 +-----
 include/linux/sockios.h                 |    3 +
 include/net/sock.h                      |   11 ++++-
 include/net/tstamps.h                   |   86 +++++++++++++++++++++++++++++++
 net/core/sock.c                         |   29 +++++++---
 net/decnet/dn_route.c                   |    2 +-
 net/econet/af_econet.c                  |    2 +-
 net/ipv4/ip_fragment.c                  |    4 +-
 net/ipv4/netfilter/ip_queue.c           |    2 +-
 net/ipv4/netfilter/ipt_ULOG.c           |    2 +-
 net/ipv4/tcp_input.c                    |    2 +-
 net/ipv4/tcp_ipv4.c                     |    2 +-
 net/ipv4/tcp_output.c                   |    2 +-
 net/ipv6/netfilter/ip6_queue.c          |    2 +-
 net/ipv6/netfilter/nf_conntrack_reasm.c |    4 +-
 net/ipv6/reassembly.c                   |    4 +-
 net/ipv6/tcp_ipv6.c                     |    2 +-
 net/ipx/af_ipx.c                        |    2 +-
 net/netfilter/nfnetlink_log.c           |    2 +-
 net/netfilter/nfnetlink_queue.c         |    2 +-
 net/netfilter/xt_time.c                 |    2 +-
 net/packet/af_packet.c                  |    4 +-
 net/rxrpc/ar-input.c                    |    2 +-
 net/socket.c                            |   21 +++++---
 net/sunrpc/svcsock.c                    |    2 +-
 28 files changed, 168 insertions(+), 55 deletions(-)
 create mode 100644 include/net/tstamps.h

diff --git a/include/asm-powerpc/socket.h b/include/asm-powerpc/socket.h
index f5a4e16..be304b6 100644
--- a/include/asm-powerpc/socket.h
+++ b/include/asm-powerpc/socket.h
@@ -61,4 +61,7 @@
 
 #define SO_MARK			36
 
+#define SO_TIMESTAMPHW		37
+#define SCM_TIMESTAMPHW		SO_TIMESTAMPHW
+
 #endif	/* _ASM_POWERPC_SOCKET_H */
diff --git a/include/linux/if.h b/include/linux/if.h
index 5c9d1fa..8264371 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -148,6 +148,11 @@ struct if_settings
 	} ifs_ifsu;
 };
 
+struct if_hwtstamps {
+	unsigned short freq;		/* timer frequency, in ns */
+	unsigned char bits;		/* how much can we count, in bits */
+};
+
 /*
  * Interface request structure used for socket
  * ioctl's.  All interface ioctl's must have parameter
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 25f8710..7ba9c1a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -727,6 +727,8 @@ struct net_device
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	void                    (*poll_controller)(struct net_device *dev);
 #endif
+	ktime_t			(*get_tstamp)(struct net_device *dev,
+					      const struct sk_buff *skb);
 
 #ifdef CONFIG_NET_NS
 	/* Network namespace this network device is inside */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 299ec4b..f19ed43 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -199,7 +199,8 @@ typedef unsigned char *sk_buff_data_t;
  *	@next: Next buffer in list
  *	@prev: Previous buffer in list
  *	@sk: Socket we are owned by
- *	@tstamp: Time we arrived
+ *	@tstamp: Time we arrived; representation might be hardware specific, do
+ *	        not access directly but via skb_get_tstamp
  *	@dev: Device we arrived on/are leaving by
  *	@transport_header: Transport layer header
  *	@network_header: Network layer header
@@ -1515,20 +1516,6 @@ static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb,
 
 extern void skb_init(void);
 
-/**
- *	skb_get_timestamp - get timestamp from a skb
- *	@skb: skb to get stamp from
- *	@stamp: pointer to struct timeval to store stamp in
- *
- *	Timestamps are stored in the skb as offsets to a base timestamp.
- *	This function converts the offset back to a struct timeval and stores
- *	it in stamp.
- */
-static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval *stamp)
-{
-	*stamp = ktime_to_timeval(skb->tstamp);
-}
-
 static inline void __net_timestamp(struct sk_buff *skb)
 {
 	skb->tstamp = ktime_get_real();
diff --git a/include/linux/sockios.h b/include/linux/sockios.h
index abef759..c7bd550 100644
--- a/include/linux/sockios.h
+++ b/include/linux/sockios.h
@@ -122,6 +122,9 @@
 #define SIOCBRADDIF	0x89a2		/* add interface to bridge      */
 #define SIOCBRDELIF	0x89a3		/* remove interface from bridge */
 
+/* hardware timestamps */
+#define SIOCHWTSQUERY	0x89b0		/* timer freq, bits, etc. */
+
 /* Device private ioctl calls */
 
 /*
diff --git a/include/net/sock.h b/include/net/sock.h
index dc42b44..7ff1d02 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -56,6 +56,7 @@
 #include <asm/atomic.h>
 #include <net/dst.h>
 #include <net/checksum.h>
+#include <net/tstamps.h>
 
 /*
  * This structure really needs to be cleaned up.
@@ -413,6 +414,7 @@ enum sock_flags {
 	SOCK_RCVTSTAMPNS, /* %SO_TIMESTAMPNS setting */
 	SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */
 	SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */
+	SOCK_RCVTSTAMPHW, /* %SO_TIMESTAMPHW setting */
 };
 
 static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)
@@ -1253,7 +1255,7 @@ extern void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 static __inline__ void
 sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
 {
-	ktime_t kt = skb->tstamp;
+	ktime_t kt = skb_get_tstamp(skb);
 
 	if (sock_flag(sk, SOCK_RCVTSTAMP))
 		__sock_recv_timestamp(msg, sk, skb);
@@ -1321,6 +1323,13 @@ extern void sock_enable_timestamp(struct sock *sk);
 extern int sock_get_timestamp(struct sock *, struct timeval __user *);
 extern int sock_get_timestampns(struct sock *, struct timespec __user *);
 
+static inline void skb_drop_dev(struct sock *sk, struct sk_buff *skb)
+{
+	if (!sk || !sock_flag(sk, SOCK_RCVTSTAMPHW))
+		skb->tstamp = skb_get_tstamp(skb);
+	skb->dev = NULL;
+}
+
 /* 
  *	Enable debug/info messages 
  */
diff --git a/include/net/tstamps.h b/include/net/tstamps.h
new file mode 100644
index 0000000..9e02673
--- /dev/null
+++ b/include/net/tstamps.h
@@ -0,0 +1,86 @@
+#ifndef _NET_TIMESTAMPS_H
+#define _NET_TIMESTAMPS_H
+
+#ifdef __KERNEL__
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+
+/*
+ * If the driver has timestamping capabilities, it will store its private
+ * formated timestamp in skb->tstamp and will set the high bit of the timestamp
+ * to announce this.
+ *
+ * The hardware timestamp can then reach userspace as it is (via
+ * SO_TIMESTAMPHW).
+ *
+ * When needed for in kernel or in userspace usage, it will be converted to a
+ * plain ktime_t timestamp via the driver's get_stamp method.
+ */
+#define SKB_TSTAMP_HW (0x8000000000000000ULL)
+
+static inline int __skb_has_hwtstamp(const struct sk_buff *skb)
+{
+	int hwstamp = (skb->tstamp.tv64 & SKB_TSTAMP_HW)?1:0;
+
+	BUG_ON(hwstamp && (!skb->dev || !skb->dev->get_tstamp));
+
+	return hwstamp;
+}
+
+static inline ktime_t mk_hwtstamp(__u64 value)
+{
+	ktime_t tstamp = {
+		.tv64 = value | SKB_TSTAMP_HW
+	};
+
+	return tstamp;
+}
+
+static inline ktime_t skb_get_tstamp(const struct sk_buff *skb)
+{
+	if (__skb_has_hwtstamp(skb))
+		return skb->dev->get_tstamp(skb->dev, skb);
+	else
+		return skb->tstamp;
+}
+
+/**
+ *	skb_get_timestamp - get timestamp (ms resolution) from a skb
+ *	@skb: skb to get stamp from
+ *	@stamp: pointer to struct timeval to store stamp in
+ */
+static inline void skb_get_timestamp(const struct sk_buff *skb,
+				     struct timeval *stamp)
+{
+	*stamp = ktime_to_timeval(skb_get_tstamp(skb));
+}
+
+/**
+ *	skb_get_timestamp - get timestamp (ns resolution) from a skb
+ *	@skb: skb to get stamp from
+ *	@stamp: pointer to struct timespec to store stamp in
+ */
+static inline void skb_get_timestamp_ns(const struct sk_buff *skb,
+					struct timespec *stamp)
+{
+	*stamp = ktime_to_timespec(skb_get_tstamp(skb));
+}
+
+/**
+ *	skb_get_hwtstamp - get the hardware timestamp from a skb
+ *	@skb: skb to get stamp from
+ *	@stamp: pointer to ktime_t where to store stamp in
+ */
+static inline int skb_get_timestamp_hw(const struct sk_buff *skb,
+				       ktime_t *stamp)
+{
+	if (!__skb_has_hwtstamp(skb))
+		return -EINVAL;
+
+	stamp->tv64 = skb->tstamp.tv64 & ~SKB_TSTAMP_HW;
+	return 0;
+}
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/net/core/sock.c b/net/core/sock.c
index 88094cb..d6f05bb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -288,7 +288,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 		goto out;
 	}
 
-	skb->dev = NULL;
+	skb_drop_dev(sk, skb);
 	skb_set_owner_r(skb, sk);
 
 	/* Cache the SKB length before we tack it onto the receive
@@ -314,7 +314,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
 	if (sk_filter(sk, skb))
 		goto discard_and_relse;
 
-	skb->dev = NULL;
+	skb_drop_dev(sk, skb);
 
 	if (nested)
 		bh_lock_sock_nested(sk);
@@ -609,17 +609,23 @@ set_rcvbuf:
 
 	case SO_TIMESTAMP:
 	case SO_TIMESTAMPNS:
+	case SO_TIMESTAMPHW:
+		sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
+		sock_reset_flag(sk, SOCK_RCVTSTAMPHW);
 		if (valbool)  {
-			if (optname == SO_TIMESTAMP)
-				sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
-			else
+			switch (optname) {
+			case SO_TIMESTAMPNS:
 				sock_set_flag(sk, SOCK_RCVTSTAMPNS);
+				break;
+			case SO_TIMESTAMPHW:
+				sock_set_flag(sk, SOCK_RCVTSTAMPHW);
+				break;
+			}
 			sock_set_flag(sk, SOCK_RCVTSTAMP);
 			sock_enable_timestamp(sk);
-		} else {
+		} else
 			sock_reset_flag(sk, SOCK_RCVTSTAMP);
-			sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
-		}
+
 		break;
 
 	case SO_RCVLOWAT:
@@ -760,13 +766,18 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 
 	case SO_TIMESTAMP:
 		v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
-				!sock_flag(sk, SOCK_RCVTSTAMPNS);
+			!sock_flag(sk, SOCK_RCVTSTAMPNS) &&
+			!sock_flag(sk, SOCK_RCVTSTAMPNS);
 		break;
 
 	case SO_TIMESTAMPNS:
 		v.val = sock_flag(sk, SOCK_RCVTSTAMPNS);
 		break;
 
+	case SO_TIMESTAMPHW:
+		v.val = sock_flag(sk, SOCK_RCVTSTAMPHW);
+		break;
+
 	case SO_RCVTIMEO:
 		lv=sizeof(struct timeval);
 		if (sk->sk_rcvtimeo == MAX_SCHEDULE_TIMEOUT) {
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index f50e88b..e718f90 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1572,7 +1572,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void
 
 	if (skb->dev)
 		dev_put(skb->dev);
-	skb->dev = NULL;
+	skb_drop_dev(init_net.rtnl, skb);
 	if (err)
 		goto out_free;
 	skb->dst = &rt->u.dst;
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 7c9bb13..430b0d7 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -162,7 +162,7 @@ static int econet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	err = memcpy_toiovec(msg->msg_iov, skb->data, copied);
 	if (err)
 		goto out_free;
-	sk->sk_stamp = skb->tstamp;
+	sk->sk_stamp = skb_get_tstamp(skb);
 
 	if (msg->msg_name)
 		memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 37221f6..b2ccc9c 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -434,9 +434,9 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	dev = skb->dev;
 	if (dev) {
 		qp->iif = dev->ifindex;
-		skb->dev = NULL;
+		skb_drop_dev(NULL, skb);
 	}
-	qp->q.stamp = skb->tstamp;
+	qp->q.stamp = skb_get_tstamp(skb);
 	qp->q.meat += skb->len;
 	atomic_add(skb->truesize, &qp->q.net->mem);
 	if (offset == 0)
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 26a37ce..86039d2 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -193,7 +193,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 
 	pmsg->packet_id       = (unsigned long )entry;
 	pmsg->data_len        = data_len;
-	tv = ktime_to_timeval(entry->skb->tstamp);
+	tv = ktime_to_timeval(skb_get_tstamp(entry->skb));
 	pmsg->timestamp_sec   = tv.tv_sec;
 	pmsg->timestamp_usec  = tv.tv_usec;
 	pmsg->mark            = entry->skb->mark;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index b192756..48c95b0 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -214,7 +214,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
 
 	/* copy hook, prefix, timestamp, payload, etc. */
 	pm->data_len = copy_len;
-	tv = ktime_to_timeval(skb->tstamp);
+	tv = ktime_to_timeval(skb_get_tstamp(skb));
 	put_unaligned(tv.tv_sec, &pm->timestamp_sec);
 	put_unaligned(tv.tv_usec, &pm->timestamp_usec);
 	put_unaligned(skb->mark, &pm->mark);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index cad73b7..be18bf1 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2872,7 +2872,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
 				flag |= FLAG_NONHEAD_RETRANS_ACKED;
 		} else {
 			ca_seq_rtt = now - scb->when;
-			last_ackt = skb->tstamp;
+			last_ackt = skb_get_tstamp(skb);
 			if (seq_rtt < 0) {
 				seq_rtt = ca_seq_rtt;
 			}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index ffe869a..e709adb 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1654,7 +1654,7 @@ process:
 	if (sk_filter(sk, skb))
 		goto discard_and_relse;
 
-	skb->dev = NULL;
+	skb_drop_dev(sk, skb);
 
 	bh_lock_sock_nested(sk);
 	ret = 0;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ad993ec..8d0f36d 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -753,7 +753,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
 	 * skbs, which it never sent before. --ANK
 	 */
 	TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
-	buff->tstamp = skb->tstamp;
+	buff->tstamp = skb_get_tstamp(skb);
 
 	old_factor = tcp_skb_pcount(skb);
 
diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c
index 2eff3ae..a2db96e 100644
--- a/net/ipv6/netfilter/ip6_queue.c
+++ b/net/ipv6/netfilter/ip6_queue.c
@@ -196,7 +196,7 @@ ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
 
 	pmsg->packet_id       = (unsigned long )entry;
 	pmsg->data_len        = data_len;
-	tv = ktime_to_timeval(entry->skb->tstamp);
+	tv = ktime_to_timeval(skb_get_tstamp(entry->skb));
 	pmsg->timestamp_sec   = tv.tv_sec;
 	pmsg->timestamp_usec  = tv.tv_usec;
 	pmsg->mark            = entry->skb->mark;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index cf20bc4..deabff0 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -378,8 +378,8 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
 	else
 		fq->q.fragments = skb;
 
-	skb->dev = NULL;
-	fq->q.stamp = skb->tstamp;
+	skb_drop_dev(sk, skb);
+	fq->q.stamp = skb_get_tstamp(skb);
 	fq->q.meat += skb->len;
 	atomic_add(skb->truesize, &nf_init_frags.mem);
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index a60d7d1..8ea4f65 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -410,9 +410,9 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
 	dev = skb->dev;
 	if (dev) {
 		fq->iif = dev->ifindex;
-		skb->dev = NULL;
+		skb_drop_dev(NULL, skb);
 	}
-	fq->q.stamp = skb->tstamp;
+	fq->q.stamp = skb_get_tstamp(skb);
 	fq->q.meat += skb->len;
 	atomic_add(skb->truesize, &fq->q.net->mem);
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 40ea9c3..a0b2927 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1756,7 +1756,7 @@ process:
 	if (sk_filter(sk, skb))
 		goto discard_and_relse;
 
-	skb->dev = NULL;
+	skb_drop_dev(sk, skb);
 
 	bh_lock_sock_nested(sk);
 	ret = 0;
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 81ae873..d8393ac 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1805,7 +1805,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (rc)
 		goto out_free;
 	if (skb->tstamp.tv64)
-		sk->sk_stamp = skb->tstamp;
+		sk->sk_stamp = skb_get_tstamp(skb);
 
 	msg->msg_namelen = sizeof(*sipx);
 
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index b8173af..3ba837d 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -455,7 +455,7 @@ __build_packet_message(struct nfulnl_instance *inst,
 
 	if (skb->tstamp.tv64) {
 		struct nfulnl_msg_packet_timestamp ts;
-		struct timeval tv = ktime_to_timeval(skb->tstamp);
+		struct timeval tv = ktime_to_timeval(skb_get_tstamp(skb));
 		ts.sec = cpu_to_be64(tv.tv_sec);
 		ts.usec = cpu_to_be64(tv.tv_usec);
 
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 3447025..6069e07 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -351,7 +351,7 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
 
 	if (entskb->tstamp.tv64) {
 		struct nfqnl_msg_packet_timestamp ts;
-		struct timeval tv = ktime_to_timeval(entskb->tstamp);
+		struct timeval tv = ktime_to_timeval(skb_get_tstamp(entskb));
 		ts.sec = cpu_to_be64(tv.tv_sec);
 		ts.usec = cpu_to_be64(tv.tv_usec);
 
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index ed76baa..3737d6e 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -172,7 +172,7 @@ time_mt(const struct sk_buff *skb, const struct net_device *in,
 	if (skb->tstamp.tv64 == 0)
 		__net_timestamp((struct sk_buff *)skb);
 
-	stamp = ktime_to_ns(skb->tstamp);
+	stamp = ktime_to_ns(skb_get_tstamp(skb));
 	do_div(stamp, NSEC_PER_SEC);
 
 	if (info->flags & XT_TIME_LOCAL_TZ)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2cee87d..c895e72 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -518,7 +518,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
 		goto drop_n_acct;
 
 	skb_set_owner_r(skb, sk);
-	skb->dev = NULL;
+	skb_drop_dev(sk, skb);
 	dst_release(skb->dst);
 	skb->dst = NULL;
 
@@ -639,7 +639,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 	h->tp_mac = macoff;
 	h->tp_net = netoff;
 	if (skb->tstamp.tv64)
-		tv = ktime_to_timeval(skb->tstamp);
+		tv = ktime_to_timeval(skb_get_tstamp(skb));
 	else
 		do_gettimeofday(&tv);
 	h->tp_sec = tv.tv_sec;
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index f8a699e..2229edc 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -87,7 +87,7 @@ int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb,
 	    !test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
 	    call->socket->sk.sk_state != RXRPC_CLOSE) {
 		skb->destructor = rxrpc_packet_destructor;
-		skb->dev = NULL;
+		skb_drop_dev(skb);
 		skb->sk = sk;
 		atomic_add(skb->truesize, &sk->sk_rmem_alloc);
 
diff --git a/net/socket.c b/net/socket.c
index 66c4a8c..40bc098 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -601,24 +601,31 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 {
 	ktime_t kt = skb->tstamp;
 
-	if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
-		struct timeval tv;
+	// FIXME: I've tried to keep the same no jump path for the common
+	// SOCK_RCVTSTAMP as in orignal code, with the unlikely constructs;
+	// is it necessary?
+	if (unlikely(sock_flag(sk, SOCK_RCVTSTAMPHW))) {
+		if (!skb_get_timestamp_hw(skb, &kt))
+		    put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPHW, sizeof(kt), &kt);
+	} else if (unlikely(sock_flag(sk, SOCK_RCVTSTAMPNS))) {
+		struct timespec ts;
 		/* Race occurred between timestamp enabling and packet
 		   receiving.  Fill in the current time for now. */
 		if (kt.tv64 == 0)
 			kt = ktime_get_real();
 		skb->tstamp = kt;
-		tv = ktime_to_timeval(kt);
-		put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
+		skb_get_timestamp_ns(skb, &ts);
+		put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
+
 	} else {
-		struct timespec ts;
+		struct timeval tv;
 		/* Race occurred between timestamp enabling and packet
 		   receiving.  Fill in the current time for now. */
 		if (kt.tv64 == 0)
 			kt = ktime_get_real();
 		skb->tstamp = kt;
-		ts = ktime_to_timespec(kt);
-		put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
+		skb_get_timestamp(skb, &tv);
+		put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
 	}
 }
 
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 3e65719..0f2226a 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -477,7 +477,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
 		/* Don't enable netstamp, sunrpc doesn't
 		   need that much accuracy */
 	}
-	svsk->sk_sk->sk_stamp = skb->tstamp;
+	svsk->sk_sk->sk_stamp = skb_get_tstamp(skb);
 	set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */
 
 	/*
-- 
1.5.6.2


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH 1/1] net: support for hardware timestamping
  2008-07-29  0:08 ` [RFC][PATCH 1/1] net: support for hardware timestamping Octavian Purdila
@ 2008-07-29 14:52   ` Patrick Ohly
  2008-07-29 15:49     ` Octavian Purdila
  2008-07-29 15:54     ` Stephen Hemminger
  0 siblings, 2 replies; 14+ messages in thread
From: Patrick Ohly @ 2008-07-29 14:52 UTC (permalink / raw)
  To: Octavian Purdila; +Cc: netdev

On Tue, 2008-07-29 at 03:08 +0300, Octavian Purdila wrote:
> New socket option and socket control message are added as well
> (SO_TIMESTAMPHW and SCM_TIMESTAMPHW).

How is a network driver notified that it is expected to do hardware time
stamping? The connection between the socket option and the driver isn't
quite clear to me (which might very well be due to my lack of experience
in this area - please bear with me...). Is the driver expected to check
the socket flags whenever it gets a chance?

IMHO it would be necessary to attach this configuration change not just
to a socket, but also to a message which is then routed to the right
device driver.

A simple on/off flag is not sufficient, either: for example, the Intel
82576 chip only has one RX register that is locked until read by the
driver. When time stamping all incoming packets, relevant time stamps
may get lost under high load. The hardware can be configured to only
time stamp packets of interest, which helps considerably. Here are some
generally useful modes:
      * PTP v1/2 Sync via UDP
      * PTP v1/2 DelayRequest via UDP
      * PTP v2 Sync via L2 (802.AS)
      * PTP v2 DelayRequest via L2 (802.AS)
      * all packets

The docs can be found here, in case anyone is interested:
http://sourceforge.net/project/showfiles.php?group_id=42302

It would be nice if the user space PTP could choose which packets are
time stamped (depending whether it is master or slave it needs either
Sync or DelayRequest, and it may use UDP or Ethernet). It may also be
important to know for the application whether the hardware is really
capable of delivering what it is asked for.

> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 299ec4b..f19ed43 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -199,7 +199,8 @@ typedef unsigned char *sk_buff_data_t;
>   *	@next: Next buffer in list
>   *	@prev: Previous buffer in list
>   *	@sk: Socket we are owned by
> - *	@tstamp: Time we arrived
> + *	@tstamp: Time we arrived; representation might be hardware specific, do
> + *	        not access directly but via skb_get_tstamp

Given that the semantic of the "tstamp" member has changed and any
existing code which still accesses it directly is broken, should the
member perhaps be renamed to trigger compiler errors in such a case?
Just a thought.

-- 
Best Regards, Patrick Ohly

The content of this message is my personal opinion only and although
I am an employee of Intel, the statements I make here in no way
represent Intel's position on the issue, nor am I authorized to speak
on behalf of Intel on this matter.

The email footer below is automatically added to comply with company
policy; this particular email is not confidental and does not have a
limited set of recipients. Therefore it can be redistributed and
discussed without restrictions.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH 1/1] net: support for hardware timestamping
  2008-07-29 14:52   ` Patrick Ohly
@ 2008-07-29 15:49     ` Octavian Purdila
  2008-07-30  9:35       ` Patrick Ohly
  2008-07-29 15:54     ` Stephen Hemminger
  1 sibling, 1 reply; 14+ messages in thread
From: Octavian Purdila @ 2008-07-29 15:49 UTC (permalink / raw)
  To: Patrick Ohly; +Cc: netdev


Hi Patrick,

Thanks for taking the time to go over this.

> > New socket option and socket control message are added as well
> > (SO_TIMESTAMPHW and SCM_TIMESTAMPHW).
>
> How is a network driver notified that it is expected to do hardware time
> stamping? The connection between the socket option and the driver isn't
> quite clear to me (which might very well be due to my lack of experience
> in this area - please bear with me...). 

I don't have much experience either so this might not be even close to a good 
solution :)

> Is the driver expected to check 
> the socket flags whenever it gets a chance?
>

If/when the driver chooses, it will start using hardware timestamps and the 
hardware timestamp will always (or when possible) be stored in skb->tstamp. 
Then, when copying the data in userspace, we will look at the socket flag and 
if the socket has requested a hw timestamp we will directly copy the 
skb->tstamp, otherwise if a regular timestamp has been requested, we will 
convert it to a regular timestamp via the new added get_tstamp driver method.

I don't know if it is required to add an on/off switch for hardware 
timestamping with this architecture, but if it is, we have some choices about 
how to do this:
- via ethtool
- via new SIOCSHWTSTAMP{ON/OFF} ioctls
- via module parameters

> IMHO it would be necessary to attach this configuration change not just
> to a socket, but also to a message which is then routed to the right
> device driver.
>
> A simple on/off flag is not sufficient, either: for example, the Intel
> 82576 chip only has one RX register that is locked until read by the
> driver. When time stamping all incoming packets, relevant time stamps
> may get lost under high load. The hardware can be configured to only
> time stamp packets of interest, which helps considerably. 

Ok, I see... How about adding a new SIOCSHWTSTAMPFILTER ioctl:

#define HWTSTAMP_FILTER_PTP_L2 0x01
#define HWTSTAMP_FILTER_PTP_L4 0x02
...
struct hwtstamp_filter {
	char type;
};

If needed we could later expand hwtstamp_filter to include ether_types, 
ip_types, udp/tcp ports, etc.

> It may also be 
> important to know for the application whether the hardware is really
> capable of delivering what it is asked for.
>

I am not sure if I understood this, but the ioctl return code should do it, 
right?

> > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> > index 299ec4b..f19ed43 100644
> > --- a/include/linux/skbuff.h
> > +++ b/include/linux/skbuff.h
> > @@ -199,7 +199,8 @@ typedef unsigned char *sk_buff_data_t;
> >   *	@next: Next buffer in list
> >   *	@prev: Previous buffer in list
> >   *	@sk: Socket we are owned by
> > - *	@tstamp: Time we arrived
> > + *	@tstamp: Time we arrived; representation might be hardware specific,
> > do + *	        not access directly but via skb_get_tstamp
>
> Given that the semantic of the "tstamp" member has changed and any
> existing code which still accesses it directly is broken, should the
> member perhaps be renamed to trigger compiler errors in such a case?
> Just a thought.

I am ok with that, but I don't know if this is an acceptable practice :)

BTW, the TX timestamps patch I've sent yesterday is also very closely related 
to PTP, and since you have experience with PTP I am wondering how the 
proposed interface looks to you.

Thanks,
tavi

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH 1/1] net: support for hardware timestamping
  2008-07-29 14:52   ` Patrick Ohly
  2008-07-29 15:49     ` Octavian Purdila
@ 2008-07-29 15:54     ` Stephen Hemminger
  2008-07-29 16:11       ` Octavian Purdila
  1 sibling, 1 reply; 14+ messages in thread
From: Stephen Hemminger @ 2008-07-29 15:54 UTC (permalink / raw)
  To: Patrick Ohly; +Cc: Octavian Purdila, netdev

On Tue, 29 Jul 2008 16:52:43 +0200
Patrick Ohly <patrick.ohly@intel.com> wrote:

> On Tue, 2008-07-29 at 03:08 +0300, Octavian Purdila wrote:
> > New socket option and socket control message are added as well
> > (SO_TIMESTAMPHW and SCM_TIMESTAMPHW).
> 
> How is a network driver notified that it is expected to do hardware time
> stamping? The connection between the socket option and the driver isn't
> quite clear to me (which might very well be due to my lack of experience
> in this area - please bear with me...). Is the driver expected to check
> the socket flags whenever it gets a chance?
> 
> IMHO it would be necessary to attach this configuration change not just
> to a socket, but also to a message which is then routed to the right
> device driver.
>
In my sky2 sample code, I took a different approach:
 1. Why have HW timestamps different than existing timestamps? If you
    just use existing timestamp, no socket API is needed.
 2. Driver can periodically check if socket timestamping is enabled,
    (atomic_read(&netstamp_needed)) and enable hardware stamping then.
    Alternatively, add a new notifier to net_enable_timestamp() and
    net_disable_timestamp().


   

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH 1/1] net: support for hardware timestamping
  2008-07-29 15:54     ` Stephen Hemminger
@ 2008-07-29 16:11       ` Octavian Purdila
  2008-07-29 17:30         ` Ingo Oeser
  2008-07-30  8:51         ` Patrick Ohly
  0 siblings, 2 replies; 14+ messages in thread
From: Octavian Purdila @ 2008-07-29 16:11 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Patrick Ohly, netdev

On Tuesday 29 July 2008, Stephen Hemminger wrote:
>
> In my sky2 sample code, I took a different approach:
>  1. Why have HW timestamps different than existing timestamps? If you
>     just use existing timestamp, no socket API is needed.

I agree that is a much better approach if you are ok with the variance 
introduced by synchronizing the HW timestamps with the CPU clock.

But if you want to precisely measure one-way delays, and if you have the hw 
timestamp units synchronized across the nodes, then you need the hardware 
timesetamp. 

Or maybe I am stuck on this idea because of doing things this way for a long 
time and there is a better solution?

Thanks,
tavi

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH 1/1] net: support for hardware timestamping
  2008-07-29 16:11       ` Octavian Purdila
@ 2008-07-29 17:30         ` Ingo Oeser
  2008-07-29 18:10           ` Octavian Purdila
  2008-07-30  8:51         ` Patrick Ohly
  1 sibling, 1 reply; 14+ messages in thread
From: Ingo Oeser @ 2008-07-29 17:30 UTC (permalink / raw)
  To: Octavian Purdila; +Cc: Stephen Hemminger, Patrick Ohly, netdev

Hi Octavian,

Octavian Purdila schrieb:
> On Tuesday 29 July 2008, Stephen Hemminger wrote:
> >
> > In my sky2 sample code, I took a different approach:
> >  1. Why have HW timestamps different than existing timestamps? If you
> >     just use existing timestamp, no socket API is needed.
> 
> I agree that is a much better approach if you are ok with the variance 
> introduced by synchronizing the HW timestamps with the CPU clock.

Just convert your hardware specific cookie you have into the nanoseconds
resolution timstamp in skb->tstamp just before calling netif_rx() 
or net_receive_skb().  

These functions will not touch it, as long as they never see a ZERO 
value there. A ZERO value in tstamp.tv64 means, we have no timestamp.

The difference to CPU clock doesn't matter. 
That compensation can be done by your driver or application at will.
For mainline, compensation in driver might be preferred.

As long as it is montonic increasing and related to time in any way, 
you could put anything there, I think :-)

The timestamp in nanoseconds will be submitted as CMSG().
see net/socket.c "put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);"

Enable it with this code fragment in user space:

	int one = 1;
	setsockopt(mysok, SOL_SOCKET, SO_TIMESTAMPNS, &one);

In the kernel the call chain is: raw_recvmsg() -> sock_recv_timestamp() -> __sock_recv_timestamp()
__sock_recv_timestamp() is:
void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
    struct sk_buff *skb)
{
    ktime_t kt = skb->tstamp;

    if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Check for nanosecond timestamps
        struct timeval tv;
        /* Race occurred between timestamp enabling and packet
           receiving.  Fill in the current time for now. */
        if (kt.tv64 == 0)
            kt = ktime_get_real();
        skb->tstamp = kt;
        tv = ktime_to_timeval(kt);
        put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
    } else {
        struct timespec ts;
        /* Race occurred between timestamp enabling and packet
           receiving.  Fill in the current time for now. */
        if (kt.tv64 == 0)
            kt = ktime_get_real();
        skb->tstamp = kt;
        ts = ktime_to_timespec(kt);
        put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Put nanosecond timestamps into CMSG()
    }
}

> But if you want to precisely measure one-way delays, and if you have the hw 
> timestamp units synchronized across the nodes, then you need the hardware 
> timesetamp. 

Yes, no argument against that. Just your new user space ABI is simply not 
required for RX timestamps. It works out of the box already.



Best Regards

Ingo Oeser

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH 1/1] net: support for hardware timestamping
  2008-07-29 17:30         ` Ingo Oeser
@ 2008-07-29 18:10           ` Octavian Purdila
  2008-07-29 18:27             ` Ingo Oeser
  0 siblings, 1 reply; 14+ messages in thread
From: Octavian Purdila @ 2008-07-29 18:10 UTC (permalink / raw)
  To: Ingo Oeser; +Cc: Stephen Hemminger, Patrick Ohly, netdev

On Tuesday 29 July 2008, Ingo Oeser wrote:
> Hi Octavian,
>

Hi Ingo,

> Just convert your hardware specific cookie you have into the nanoseconds
> resolution timstamp in skb->tstamp just before calling netif_rx()
> or net_receive_skb().
>
> These functions will not touch it, as long as they never see a ZERO
> value there. A ZERO value in tstamp.tv64 means, we have no timestamp.
>
> The difference to CPU clock doesn't matter.
> That compensation can be done by your driver or application at will.
> For mainline, compensation in driver might be preferred.
>
> As long as it is montonic increasing and related to time in any way,
> you could put anything there, I think :-)
>

In here:

net/netfilter/xt_time.c

it seems that the skb->tstamp needs to be CPU time. 

Frankly I don't care about that, but the tstamp is also used in other places 
like the IP and TCP code paths and I can't say that I barely understand that 
part :) 

But if it is ok to use any kind of monotonic increasing timestamp, that will 
solve my problem, indeed.

Thanks,
tavi

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH 1/1] net: support for hardware timestamping
  2008-07-29 18:10           ` Octavian Purdila
@ 2008-07-29 18:27             ` Ingo Oeser
  0 siblings, 0 replies; 14+ messages in thread
From: Ingo Oeser @ 2008-07-29 18:27 UTC (permalink / raw)
  To: Octavian Purdila; +Cc: Stephen Hemminger, Patrick Ohly, netdev

Hi Octavian,

Octavian Purdila schrieb:
> On Tuesday 29 July 2008, Ingo Oeser wrote:
> In here:
> 
> net/netfilter/xt_time.c
> 
> it seems that the skb->tstamp needs to be CPU time. 

Yeah, but not being off more than half a second should be ok
even for that obscure use case. 

If your objective is to measure time and relate it to wall clock time, 
your host clock should be close to that using NTP without any problems :-)

> Frankly I don't care about that, but the tstamp is also used in other places 
> like the IP and TCP code paths and I can't say that I barely understand that 
> part :) 
> 
> But if it is ok to use any kind of monotonic increasing timestamp, that will 
> solve my problem, indeed.

Great! :-)


Best Regards

Ingo Oeser

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH 1/1] net: support for hardware timestamping
  2008-07-29 16:11       ` Octavian Purdila
  2008-07-29 17:30         ` Ingo Oeser
@ 2008-07-30  8:51         ` Patrick Ohly
  2008-07-30  9:34           ` Ingo Oeser
  1 sibling, 1 reply; 14+ messages in thread
From: Patrick Ohly @ 2008-07-30  8:51 UTC (permalink / raw)
  To: Octavian Purdila; +Cc: Stephen Hemminger, netdev, Ingo Oeser

On Tue, 2008-07-29 at 19:11 +0300, Octavian Purdila wrote:
> On Tuesday 29 July 2008, Stephen Hemminger wrote:
> >
> > In my sky2 sample code, I took a different approach:
> >  1. Why have HW timestamps different than existing timestamps? If you
> >     just use existing timestamp, no socket API is needed.
> 
> I agree that is a much better approach if you are ok with the variance 
> introduced by synchronizing the HW timestamps with the CPU clock.
> 
> But if you want to precisely measure one-way delays, and if you have the hw 
> timestamp units synchronized across the nodes, then you need the hardware 
> timesetamp. 
> 
> Or maybe I am stuck on this idea because of doing things this way for a long 
> time and there is a better solution?

No, I think you have a valid case here. Synchronizing the clocks inside
the NICs can be useful:
      * for analysing the accuracy of clock synchronization (that's what
        I have done in the PTP paper that I mentioned earlier)
      * for highly accurate network measurements (your use case)
      * advanced usages of the NIC clock (for example, Intel's new NIC
        can use the NIC clock as trigger for hardware events - this may
        be useful for certain embedded hardware designs)

Any kind of transformation into system time gets in the way when trying
to do that. Having said that, I'm fine with a solution which only
exposes transformed hardware time stamps to the user space app. My goal
is the IMHO more common system time synchronization and therefore direct
access to raw hardware time stamps is not needed.

But there's one caveat: the fallback of returning software time stamps
to the app if no hardware time stamp is available will affect accuracy
of system time synchronization because time stamps will jump when
switching from one method to the other. That is primarily because a) the
time stamp is generated at a different point in the packet processing
pipeline but also because b) the NIC->system time conversion will
introduce a certain error, which is not present in time stamps taken in
software directly.

I had tried to use the system time time as fallback in the patched PTPd
when no hardware time stamp could be taken (which happened occassionally
under load). It didn't work well at all, simply ignoring the packet and
resending another Sync or DelayRequest was much better.

For received packets, the fallback would happen in
__sock_recv_timestamp(), as Ingo pointed out. My suggestion is to
introduce a new socket flag which indicates whether the app wants the
fallback or not.

For sent packets, Tavi's proposal is that "If timestamping is requested
and no timestamping is done in the device driver (potentially this may
use hardware timestamping), it will be done in software after the
device's start_hard_xmit routine." Perhaps the parameter of the new
SO_TXTIMESTAMP can be a multi-value? 0 = no TX time stamps, 1 = TX time
stamp in hardware if possible with software as fallback, 2 = only
hardware TX time stamps. Either that, or use the same new socket flag as
for RX time stamps.

-- 
Best Regards, Patrick Ohly

The content of this message is my personal opinion only and although
I am an employee of Intel, the statements I make here in no way
represent Intel's position on the issue, nor am I authorized to speak
on behalf of Intel on this matter.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH 1/1] net: support for hardware timestamping
  2008-07-30  8:51         ` Patrick Ohly
@ 2008-07-30  9:34           ` Ingo Oeser
  0 siblings, 0 replies; 14+ messages in thread
From: Ingo Oeser @ 2008-07-30  9:34 UTC (permalink / raw)
  To: Patrick Ohly; +Cc: Octavian Purdila, Stephen Hemminger, netdev

Hi Patrick,

Patrick Ohly schrieb:
> Perhaps the parameter of the new
> SO_TXTIMESTAMP can be a multi-value? 0 = no TX time stamps, 1 = TX time
> stamp in hardware if possible with software as fallback, 2 = only
> hardware TX time stamps. Either that, or use the same new socket flag as
> for RX time stamps.

Perhaps we can make it a bitmask (please choose a good prefix instead of XXX):

enum {
	XXX_SOFTWARE_TX_TIMESTAMP = (1 << 0),
	XXX_SOFTWARE_RX_TIMESTAMP = (1 << 1),
	XXX_DRIVER_TX_TIMESTAMP	  = (1 << 2),
	XXX_DRIVER_RX_TIMESTAMP	  = (1 << 3),
};

That way we can express everything required so far and have some bits left 
to define precision, system time correlation and more.


Best Regards

Ingo Oeser

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH 1/1] net: support for hardware timestamping
  2008-07-29 15:49     ` Octavian Purdila
@ 2008-07-30  9:35       ` Patrick Ohly
  2008-07-30 13:38         ` Octavian Purdila
  0 siblings, 1 reply; 14+ messages in thread
From: Patrick Ohly @ 2008-07-30  9:35 UTC (permalink / raw)
  To: Octavian Purdila; +Cc: netdev

On Tue, 2008-07-29 at 18:49 +0300, Octavian Purdila wrote:
> > Is the driver expected to check 
> > the socket flags whenever it gets a chance?
> >
> 
> If/when the driver chooses, it will start using hardware timestamps and the 
> hardware timestamp will always (or when possible) be stored in skb->tstamp.

It's the app which chooses when to enable the feature, so we need a way
to communicate that.

> > A simple on/off flag is not sufficient, either: for example, the Intel
> > 82576 chip only has one RX register that is locked until read by the
> > driver. When time stamping all incoming packets, relevant time stamps
> > may get lost under high load. The hardware can be configured to only
> > time stamp packets of interest, which helps considerably. 
> 
> Ok, I see... How about adding a new SIOCSHWTSTAMPFILTER ioctl:
> 
> #define HWTSTAMP_FILTER_PTP_L2 0x01
> #define HWTSTAMP_FILTER_PTP_L4 0x02
> ...
> struct hwtstamp_filter {
> 	char type;
> };
> 
> If needed we could later expand hwtstamp_filter to include ether_types, 
> ip_types, udp/tcp ports, etc.

Yes, that'll work. Regarding the design of the ioctl() call and its
parameter, how do we preserve backwards compatibility as new fields get
added?

The initial list of defines could be:

/** time stamp no incoming packet at all */
#define HWTSTAMP_FILTER_NONE 0x00

/** time stamp any incoming packet */
#define HWTSTAMP_FILTER_ALL 0x01

/** PTP v1, UDP, any kind of event packet */
#define HWTSTAMP_FILTER_PTP_V1_L4_EVENT 0x02
/** PTP v1, UDP, Sync packet */
#define HWTSTAMP_FILTER_PTP_V1_L4_SYNC 0x03
/** PTP v1, UDP, Delay_req packet */
#define HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ 0x04

/** PTP v2, UDP, any kind of event packet */
#define HWTSTAMP_FILTER_PTP_V2_L4_EVENT 0x05
/** PTP v2, UDP, Sync packet */
#define HWTSTAMP_FILTER_PTP_V2_L4_SYNC 0x06
/** PTP v2, UDP, Delay_req packet */
#define HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ 0x07

/** 802.AS1, Ethernet, any kind of event packet */
#define HWTSTAMP_FILTER_PTP_V2_L2_EVENT 0x08
/** 802.AS1, Ethernet, Sync packet */
#define HWTSTAMP_FILTER_PTP_V2_L2_SYNC 0x09
/** 802.AS1, Ethernet, Delay_req packet */
#define HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ 0x0A

/** PTP v2/802.AS1, any layer, any kind of event packet */
#define HWTSTAMP_FILTER_PTP_V2_EVENT 0x0B
/** PTP v2/802.AS1, any layer, Sync packet */
#define HWTSTAMP_FILTER_PTP_V2_SYNC 0x0C
/** PTP v2/802.AS1, any layer, Delay_req packet */
#define HWTSTAMP_FILTER_PTP_V2_DELAY_REQ 0x0D

The "any event packet" is useful for "dumb" PTP daemons which want to
avoid reconfiguring the driver as they switch from master to slave or
vice versa. In V1 there was no mapping to L2, so that is missing from
the list.

I prefer an enumeration over flags because by design, only valid
combinations are possible.

> > It may also be 
> > important to know for the application whether the hardware is really
> > capable of delivering what it is asked for.
> >
> 
> I am not sure if I understood this, but the ioctl return code should do it, 
> right?

Correct. The semantic of the call would be that it's a hint to the
driver what the app wants. It must time stamp the indicated packets, but
if it is not capable of filtering exactly as requested, then it is free
to also time stamp additional ones. If it cannot implement the requested
functionality, EINVAL is returned.

The app is encourage to be as specific as possible. For example, the
HWTSTAMP_FILTER_PTP_V1_L4_EVENT mode is not supported by the new Intel
NIC. The daemon has to select either HWTSTAMP_FILTER_PTP_V1_L4_SYNC or
HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ, as needed.

> > > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> > > index 299ec4b..f19ed43 100644
> > > --- a/include/linux/skbuff.h
> > > +++ b/include/linux/skbuff.h
> > > @@ -199,7 +199,8 @@ typedef unsigned char *sk_buff_data_t;
> > >   *	@next: Next buffer in list
> > >   *	@prev: Previous buffer in list
> > >   *	@sk: Socket we are owned by
> > > - *	@tstamp: Time we arrived
> > > + *	@tstamp: Time we arrived; representation might be hardware specific,
> > > do + *	        not access directly but via skb_get_tstamp
> >
> > Given that the semantic of the "tstamp" member has changed and any
> > existing code which still accesses it directly is broken, should the
> > member perhaps be renamed to trigger compiler errors in such a case?
> > Just a thought.
> 
> I am ok with that, but I don't know if this is an acceptable practice :)

Neither do I. Any comments from the experts? However, if we follow
Ingo's proposal that change wouldn't be necessary anyway.

> BTW, the TX timestamps patch I've sent yesterday is also very closely related 
> to PTP, and since you have experience with PTP I am wondering how the 
> proposed interface looks to you.

See my comments regarding the software time stamping fallback in the
other mail that I just sent. I'll also have to look at the patch in more
detail.

-- 
Best Regards, Patrick Ohly

The content of this message is my personal opinion only and although
I am an employee of Intel, the statements I make here in no way
represent Intel's position on the issue, nor am I authorized to speak
on behalf of Intel on this matter.


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH 1/1] net: support for hardware timestamping
  2008-07-30  9:35       ` Patrick Ohly
@ 2008-07-30 13:38         ` Octavian Purdila
  2008-07-30 14:00           ` Patrick Ohly
  0 siblings, 1 reply; 14+ messages in thread
From: Octavian Purdila @ 2008-07-30 13:38 UTC (permalink / raw)
  To: Patrick Ohly; +Cc: netdev

On Wednesday 30 July 2008, Patrick Ohly wrote:

> It's the app which chooses when to enable the feature, so we need a way
> to communicate that.

Ok, perhaps a new SIOCSHWTSTAMP ioctl? (or maybe we can piggy back on the 
filter one with the HWTSTAMP_FILTER_NONE you proposed?)

> > Ok, I see... How about adding a new SIOCSHWTSTAMPFILTER ioctl:
> >
> > #define HWTSTAMP_FILTER_PTP_L2 0x01
> > #define HWTSTAMP_FILTER_PTP_L4 0x02
> > ...
> > struct hwtstamp_filter {
> > 	char type;
> > };
> >
> > If needed we could later expand hwtstamp_filter to include ether_types,
> > ip_types, udp/tcp ports, etc.
>
> Yes, that'll work. Regarding the design of the ioctl() call and its
> parameter, how do we preserve backwards compatibility as new fields get
> added?
>

I think we could simply add more fields in the structure since they will only 
be used with new commands. If the app know about the new command than it will 
know to use the new structure layout.

> The initial list of defines could be:
<snip>
> I prefer an enumeration over flags because by design, only valid
> combinations are possible.
>

I agree.

Thanks,
tavi

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC][PATCH 1/1] net: support for hardware timestamping
  2008-07-30 13:38         ` Octavian Purdila
@ 2008-07-30 14:00           ` Patrick Ohly
  0 siblings, 0 replies; 14+ messages in thread
From: Patrick Ohly @ 2008-07-30 14:00 UTC (permalink / raw)
  To: Octavian Purdila; +Cc: netdev

On Wed, 2008-07-30 at 16:38 +0300, Octavian Purdila wrote:
> On Wednesday 30 July 2008, Patrick Ohly wrote:
> 
> > It's the app which chooses when to enable the feature, so we need a way
> > to communicate that.
> 
> Ok, perhaps a new SIOCSHWTSTAMP ioctl? (or maybe we can piggy back on the 
> filter one with the HWTSTAMP_FILTER_NONE you proposed?)

I'm fine with just one ioctl and would prefer a generic SIOCSHWTSTAMP. I
would like to go even one step further than discussed so far and also
use it to enable TX time stamping. The reasoning is that the driver
might have to do some work (like initializing the NIC clock, setting up
NIC/system time comparison) both for TX and RX.

That means that we need two fields and defines for TX. Adding new fields
later on works, but having to support multiple different sizes in the
driver could get complicated, so I suggest to add at least a (currently
unused) flag field. Not sure what the usual convention is regarding the
type fields - char or int?

struct hwtstamp_config {
    char tx_type;
    char rx_filter_type;
    int flags;
};

/**
 * no outgoing packet will need hardware time stamping;
 * should a packet arrive which asks for it, no hardware
 * time stamping will be done
 */
#define HWTSTAMP_TX_OFF 0

/**
 * enables hardware time stamping for outgoing packets;
 * the sender of the packet decides which are to be 
 * time stamped
 */
#define HWSTAMP_TX_ON 1

-- 
Best Regards, Patrick Ohly

The content of this message is my personal opinion only and although
I am an employee of Intel, the statements I make here in no way
represent Intel's position on the issue, nor am I authorized to speak
on behalf of Intel on this matter.


^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2008-07-30 14:02 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-07-29  0:07 [RFC][PATCH 0/1] net: support for hardware timestamps Octavian Purdila
2008-07-29  0:08 ` [RFC][PATCH 1/1] net: support for hardware timestamping Octavian Purdila
2008-07-29 14:52   ` Patrick Ohly
2008-07-29 15:49     ` Octavian Purdila
2008-07-30  9:35       ` Patrick Ohly
2008-07-30 13:38         ` Octavian Purdila
2008-07-30 14:00           ` Patrick Ohly
2008-07-29 15:54     ` Stephen Hemminger
2008-07-29 16:11       ` Octavian Purdila
2008-07-29 17:30         ` Ingo Oeser
2008-07-29 18:10           ` Octavian Purdila
2008-07-29 18:27             ` Ingo Oeser
2008-07-30  8:51         ` Patrick Ohly
2008-07-30  9:34           ` Ingo Oeser

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).