[PATCH] Generalize socket rx gap / receive queue overflow cmsg

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH] Generalize socket rx gap / receive queue overflow cmsg
@ 2009-10-07 18:08 Neil Horman
  2009-10-08  1:05 ` Eric Dumazet
                   ` (3 more replies)
  0 siblings, 4 replies; 15+ messages in thread
From: Neil Horman @ 2009-10-07 18:08 UTC (permalink / raw)
  To: netdev; +Cc: eric.dumazet, davem, socketcan, nhorman

Create a new socket level option to report number of queue overflows

Recently I augmented the AF_PACKET protocol to report the number of frames lost
on the socket receive queue between any two enqueued frames.  This value was
exported via a SOL_PACKET level cmsg.  AFter I completed that work it was
requested that this feature be generalized so that any datagram oriented socket
could make use of this option.  As such I've created this patch, It creates a
new SOL_SOCKET level option called SO_RXQ_OVFL, which when enabled exports a
SOL_SOCKET level cmsg that reports the nubmer of times the sk_receive_queue
overflowed between any two given frames.  It also augments the AF_PACKET
protocol to take advantage of this new feature (as it previously did not touch
sk->sk_drops, which this patch uses to record the overflow count).  Tested
successfully by me.

Notes:

1) Unlike my previous patch, this patch simply records the sk_drops value, which
is not a number of drops between packets, but rather a total number of drops.
Deltas must be computed in user space.

2) While this patch currently works with datagram oriented protocols, it will
also be accepted by non-datagram oriented protocols. I'm not sure if thats
agreeable to everyone, but my argument in favor of doing so is that, for those
protocols which aren't applicable to this option, sk_drops will always be zero,
and reporting no drops on a receive queue that isn't used for those
non-participating protocols seems reasonable to me.  This also saves us having
to code in a per-protocol opt in mechanism.

3) This applies cleanly to net-next assuming that commit
977750076d98c7ff6cbda51858bb5a5894a9d9ab (my af packet cmsg patch) is reverted.

Neil

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

diff --git a/include/asm-generic/socket.h b/include/asm-generic/socket.h
index 538991c..7cde78e 100644
--- a/include/asm-generic/socket.h
+++ b/include/asm-generic/socket.h
@@ -63,4 +63,5 @@
 #define SO_PROTOCOL		38
 #define SO_DOMAIN		39
 
+#define SO_RXQ_OVFL		40
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/linux/net.h b/include/linux/net.h
index 529a093..b7dafdd 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -69,6 +69,7 @@ struct net;
 #define SOCK_NOSPACE		2
 #define SOCK_PASSCRED		3
 #define SOCK_PASSSEC		4
+#define SOCK_RXQ_OVFL		5
 
 #ifndef ARCH_HAS_SOCKET_TYPES
 /**
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index df7b23a..8c866b5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -389,8 +389,10 @@ struct sk_buff {
 #ifdef CONFIG_NETWORK_SECMARK
 	__u32			secmark;
 #endif
-
-	__u32			mark;
+	union {
+		__u32		mark;
+		__u32		dropcount;
+	};
 
 	__u16			vlan_tci;
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 7626b6a..8bd366f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -306,6 +306,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	skb_len = skb->len;
 
 	skb_queue_tail(&sk->sk_receive_queue, skb);
+	skb->dropcount = atomic_read(&sk->sk_drops);
 
 	if (!sock_flag(sk, SOCK_DEAD))
 		sk->sk_data_ready(sk, skb_len);
@@ -702,6 +703,12 @@ set_rcvbuf:
 
 		/* We implement the SO_SNDLOWAT etc to
 		   not be settable (1003.1g 5.3) */
+	case SO_RXQ_OVFL:
+		if (valbool)
+			set_bit(SOCK_RXQ_OVFL, &sock->flags);
+		else
+			clear_bit(SOCK_RXQ_OVFL, &sock->flags);
+		break;
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -901,6 +908,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sk->sk_mark;
 		break;
 
+	case SO_RXQ_OVFL:
+		v.val = test_bit(SOCK_RXQ_OVFL, &sock->flags);
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d7ecca0..920ae1e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -617,6 +617,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (pskb_trim(skb, snaplen))
 		goto drop_n_acct;
 
+	skb->dropcount = atomic_read(&sk->sk_drops);
 	skb_set_owner_r(skb, sk);
 	skb->dev = NULL;
 	skb_dst_drop(skb);
@@ -634,6 +635,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 drop_n_acct:
 	spin_lock(&sk->sk_receive_queue.lock);
 	po->stats.tp_drops++;
+	atomic_inc(&sk->sk_drops);
 	spin_unlock(&sk->sk_receive_queue.lock);
 
 drop_n_restore:
diff --git a/net/socket.c b/net/socket.c
index 7565536..ad157a3 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -673,6 +673,12 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 {
 	int err;
 	struct sock_iocb *si = kiocb_to_siocb(iocb);
+	struct sk_buff *skb;
+	int rc;
+	struct sock *sk = sock->sk;
+	unsigned long cpu_flags;
+	__u32 gap = 0;
+	int check_drops = test_bit(SOCK_RXQ_OVFL, &sock->flags);
 
 	si->sock = sock;
 	si->scm = NULL;
@@ -684,7 +690,21 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (err)
 		return err;
 
-	return sock->ops->recvmsg(iocb, sock, msg, size, flags);
+	if (check_drops) {
+		skb = skb_recv_datagram(sk, flags|MSG_PEEK,
+				flags & MSG_DONTWAIT, &err);
+		if (skb) {
+			gap = skb->dropcount;
+			consume_skb(skb);
+		}
+	}
+
+	rc = sock->ops->recvmsg(iocb, sock, msg, size, flags);
+
+	if (check_drops && (rc > 0))
+		put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, sizeof(__u32), &gap);
+
+	return rc;
 }
 
 int sock_recvmsg(struct socket *sock, struct msghdr *msg,

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] Generalize socket rx gap / receive queue overflow cmsg
  2009-10-07 18:08 [PATCH] Generalize socket rx gap / receive queue overflow cmsg Neil Horman
@ 2009-10-08  1:05 ` Eric Dumazet
  2009-10-08 13:54   ` Neil Horman
  2009-10-09 19:35 ` [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v2) Neil Horman
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 15+ messages in thread
From: Eric Dumazet @ 2009-10-08  1:05 UTC (permalink / raw)
  To: Neil Horman; +Cc: netdev, davem, socketcan

Neil Horman a écrit :
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 7626b6a..8bd366f 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -306,6 +306,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
>  	skb_len = skb->len;
>  


>  	skb_queue_tail(&sk->sk_receive_queue, skb);
> +	skb->dropcount = atomic_read(&sk->sk_drops);

No, skb was given to skb_queue_tail(), you are not allowed to touch it now,
another cpu might already consume it.

You better do :

struct sk_buff_head *list = &sk->sk_receive_queue;

spin_lock_irqsave(&list->lock, flags);
skb->dropcount = atomic_read(&sk->sk_drops); // should be done under lock protection
__skb_queue_tail(list, newsk);
spin_unlock_irqrestore(&list->lock, flags);



>  
>  	if (!sock_flag(sk, SOCK_DEAD))
>  		sk->sk_data_ready(sk, skb_len);
> @@ -702,6 +703,12 @@ set_rcvbuf:
>  
>  		/* We implement the SO_SNDLOWAT etc to
>  		   not be settable (1003.1g 5.3) */
> +	case SO_RXQ_OVFL:
> +		if (valbool)
> +			set_bit(SOCK_RXQ_OVFL, &sock->flags);
> +		else
> +			clear_bit(SOCK_RXQ_OVFL, &sock->flags);
> +		break;
>  	default:
>  		ret = -ENOPROTOOPT;
>  		break;
> @@ -901,6 +908,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
>  		v.val = sk->sk_mark;
>  		break;
>  
> +	case SO_RXQ_OVFL:
> +		v.val = test_bit(SOCK_RXQ_OVFL, &sock->flags);
> +		break;
> +
>  	default:
>  		return -ENOPROTOOPT;
>  	}
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index d7ecca0..920ae1e 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -617,6 +617,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
>  	if (pskb_trim(skb, snaplen))
>  		goto drop_n_acct;
>  

> +	skb->dropcount = atomic_read(&sk->sk_drops);
This should be done a litle bit after, right before "__skb_queue_tail(&sk->sk_receive_queue, skb); "

>  	skb_set_owner_r(skb, sk);
>  	skb->dev = NULL;
>  	skb_dst_drop(skb);
> @@ -634,6 +635,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
>  drop_n_acct:


>  	spin_lock(&sk->sk_receive_queue.lock);
>  	po->stats.tp_drops++;
> +	atomic_inc(&sk->sk_drops);
>  	spin_unlock(&sk->sk_receive_queue.lock);

You could replace this block of four lines by : po->stat.tp_drop = atomic_inc_return(&sk->sk_drops);

>  
>  drop_n_restore:
> diff --git a/net/socket.c b/net/socket.c
> index 7565536..ad157a3 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -673,6 +673,12 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
>  {
>  	int err;
>  	struct sock_iocb *si = kiocb_to_siocb(iocb);
> +	struct sk_buff *skb;
> +	int rc;
> +	struct sock *sk = sock->sk;
> +	unsigned long cpu_flags;
> +	__u32 gap = 0;

> +	int check_drops = test_bit(SOCK_RXQ_OVFL, &sock->flags);
>  
>  	si->sock = sock;
>  	si->scm = NULL;
> @@ -684,7 +690,21 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
>  	if (err)
>  		return err;
>  
> -	return sock->ops->recvmsg(iocb, sock, msg, size, flags);




> +	if (check_drops) {
> +		skb = skb_recv_datagram(sk, flags|MSG_PEEK,
> +				flags & MSG_DONTWAIT, &err);

	Ouch, this is too expensive, please find another way :)

> +		if (skb) {
> +			gap = skb->dropcount;
> +			consume_skb(skb);
> +		}
> +	}
> +
> +	rc = sock->ops->recvmsg(iocb, sock, msg, size, flags);
> +
> +	if (check_drops && (rc > 0))

		&& gap != 0

> +		put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, sizeof(__u32), &gap);
> +


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Generalize socket rx gap / receive queue overflow cmsg
  2009-10-08  1:05 ` Eric Dumazet
@ 2009-10-08 13:54   ` Neil Horman
  2009-10-08 14:45     ` Eric Dumazet
  0 siblings, 1 reply; 15+ messages in thread
From: Neil Horman @ 2009-10-08 13:54 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, davem, socketcan

> 
> > +	if (check_drops) {
> > +		skb = skb_recv_datagram(sk, flags|MSG_PEEK,
> > +				flags & MSG_DONTWAIT, &err);
> 
> 	Ouch, this is too expensive, please find another way :)
> 
> > +		if (skb) {
> > +			gap = skb->dropcount;
> > +			consume_skb(skb);
> > +		}
> > +	}
> > +
I'm not sure that I see the expense here, and what expense there is, I don't see
how it avoidable.  In order to do this reporting at the socket level, we need to
look at the skb at the head of the receive queue.  But we need to do so in a way
thats consistent with the flags being passed in (i.e. if this is a blocking
socket, we need to block here until something is available to read).  Then its
just an atomic_inc on skb->users, followed by a dec in the consume_skb.  I could
implement the logic for DONTWAIT myself, and skip the atomic_inc/dec, but I'm
not sure thats much of a savings.  If you have another thought, I'm certainly
open to it.

Neil

> > +	rc = sock->ops->recvmsg(iocb, sock, msg, size, flags);
> > +
> > +	if (check_drops && (rc > 0))
> 
> 		&& gap != 0
> 
> > +		put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, sizeof(__u32), &gap);
> > +
> 
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Generalize socket rx gap / receive queue overflow cmsg
  2009-10-08 13:54   ` Neil Horman
@ 2009-10-08 14:45     ` Eric Dumazet
  2009-10-08 17:20       ` Neil Horman
  0 siblings, 1 reply; 15+ messages in thread
From: Eric Dumazet @ 2009-10-08 14:45 UTC (permalink / raw)
  To: Neil Horman; +Cc: netdev, davem, socketcan

Neil Horman a écrit :
>>> +	if (check_drops) {
>>> +		skb = skb_recv_datagram(sk, flags|MSG_PEEK,
>>> +				flags & MSG_DONTWAIT, &err);
>> 	Ouch, this is too expensive, please find another way :)
>>
>>> +		if (skb) {
>>> +			gap = skb->dropcount;
>>> +			consume_skb(skb);
>>> +		}
>>> +	}
>>> +
> I'm not sure that I see the expense here, and what expense there is, I don't see
> how it avoidable.  In order to do this reporting at the socket level, we need to
> look at the skb at the head of the receive queue.  But we need to do so in a way
> thats consistent with the flags being passed in (i.e. if this is a blocking
> socket, we need to block here until something is available to read).  Then its
> just an atomic_inc on skb->users, followed by a dec in the consume_skb.  I could
> implement the logic for DONTWAIT myself, and skip the atomic_inc/dec, but I'm
> not sure thats much of a savings.  If you have another thought, I'm certainly
> open to it.

The expense is a lot of atomic ops. You forgot the lock, so thats four atomic ops.

You can do all this with no extra atomics.

All you need is some function with (struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
triplet.

hint : sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)

Could be renamed to something else if you want...

sock_recv_ts_or_drops() or whatever

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Generalize socket rx gap / receive queue overflow cmsg
  2009-10-08 14:45     ` Eric Dumazet
@ 2009-10-08 17:20       ` Neil Horman
  0 siblings, 0 replies; 15+ messages in thread
From: Neil Horman @ 2009-10-08 17:20 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, davem, socketcan

On Thu, Oct 08, 2009 at 04:45:48PM +0200, Eric Dumazet wrote:
> Neil Horman a écrit :
> >>> +	if (check_drops) {
> >>> +		skb = skb_recv_datagram(sk, flags|MSG_PEEK,
> >>> +				flags & MSG_DONTWAIT, &err);
> >> 	Ouch, this is too expensive, please find another way :)
> >>
> >>> +		if (skb) {
> >>> +			gap = skb->dropcount;
> >>> +			consume_skb(skb);
> >>> +		}
> >>> +	}
> >>> +
> > I'm not sure that I see the expense here, and what expense there is, I don't see
> > how it avoidable.  In order to do this reporting at the socket level, we need to
> > look at the skb at the head of the receive queue.  But we need to do so in a way
> > thats consistent with the flags being passed in (i.e. if this is a blocking
> > socket, we need to block here until something is available to read).  Then its
> > just an atomic_inc on skb->users, followed by a dec in the consume_skb.  I could
> > implement the logic for DONTWAIT myself, and skip the atomic_inc/dec, but I'm
> > not sure thats much of a savings.  If you have another thought, I'm certainly
> > open to it.
> 
> The expense is a lot of atomic ops. You forgot the lock, so thats four atomic ops.
> 
> You can do all this with no extra atomics.
> 
> All you need is some function with (struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
> triplet.
> 
> hint : sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
> 
> Could be renamed to something else if you want...
> 
> sock_recv_ts_or_drops() or whatever
Ok, but that will require moving the flag that we're triggering this on down
into the sock structure, and not doing the check up in __sock_recvmsg, but I
suppose thats fine.  Ok, I'll repost soon.  Thanks!
Neil

> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v2)
  2009-10-07 18:08 [PATCH] Generalize socket rx gap / receive queue overflow cmsg Neil Horman
  2009-10-08  1:05 ` Eric Dumazet
@ 2009-10-09 19:35 ` Neil Horman
  2009-10-09 21:31   ` Eric Dumazet
  2009-10-09 23:56 ` [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v3) Neil Horman
  2009-10-10 12:35 ` [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v4) Neil Horman
  3 siblings, 1 reply; 15+ messages in thread
From: Neil Horman @ 2009-10-09 19:35 UTC (permalink / raw)
  To: netdev; +Cc: eric.dumazet, davem, socketcan, nhorman

Ok, take two of this patch, taking in Erics notes:

Change Notes:

1) Locking on dropcount cleaned up

2) Support for reading of dropcount moved to a lower level support function
(sock_recv_ts_and_drops, modeled after sock_recv_timestamp).  This should make
this work a good deal faster

3) Socket flags moved to sk->sk_flags structure in support of (2)

Works well for me.


========================================================================

Create a new socket level option to report number of queue overflows

Recently I augmented the AF_PACKET protocol to report the number of frames lost
on the socket receive queue between any two enqueued frames.  This value was
exported via a SOL_PACKET level cmsg.  AFter I completed that work it was
requested that this feature be generalized so that any datagram oriented socket
could make use of this option.  As such I've created this patch, It creates a
new SOL_SOCKET level option called SO_RXQ_OVFL, which when enabled exports a
SOL_SOCKET level cmsg that reports the nubmer of times the sk_receive_queue
overflowed between any two given frames.  It also augments the AF_PACKET
protocol to take advantage of this new feature (as it previously did not touch
sk->sk_drops, which this patch uses to record the overflow count).  Tested
successfully by me.

Notes:

1) Unlike my previous patch, this patch simply records the sk_drops value, which
is not a number of drops between packets, but rather a total number of drops.
Deltas must be computed in user space.

2) While this patch currently works with datagram oriented protocols, it will
also be accepted by non-datagram oriented protocols. I'm not sure if thats
agreeable to everyone, but my argument in favor of doing so is that, for those
protocols which aren't applicable to this option, sk_drops will always be zero,
and reporting no drops on a receive queue that isn't used for those
non-participating protocols seems reasonable to me.  This also saves us having
to code in a per-protocol opt in mechanism.

3) This applies cleanly to net-next assuming that commit
977750076d98c7ff6cbda51858bb5a5894a9d9ab (my af packet cmsg patch) is reverted

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>


 include/asm-generic/socket.h |    1 +
 include/linux/skbuff.h       |    6 ++++--
 include/net/sock.h           |   13 +++++++++++++
 net/atm/common.c             |    2 +-
 net/bluetooth/af_bluetooth.c |    2 +-
 net/bluetooth/rfcomm/sock.c  |    2 +-
 net/can/bcm.c                |    2 +-
 net/can/raw.c                |    2 +-
 net/core/sock.c              |   17 ++++++++++++++++-
 net/ieee802154/dgram.c       |    2 +-
 net/ieee802154/raw.c         |    2 +-
 net/ipv4/raw.c               |    2 +-
 net/ipv4/udp.c               |    2 +-
 net/ipv6/raw.c               |    2 +-
 net/ipv6/udp.c               |    2 +-
 net/key/af_key.c             |    2 +-
 net/packet/af_packet.c       |    7 +++----
 net/rxrpc/ar-recvmsg.c       |    2 +-
 net/sctp/socket.c            |    2 +-
 net/socket.c                 |    7 +++++++
 20 files changed, 58 insertions(+), 21 deletions(-)

diff --git a/include/asm-generic/socket.h b/include/asm-generic/socket.h
index 538991c..9a6115e 100644
--- a/include/asm-generic/socket.h
+++ b/include/asm-generic/socket.h
@@ -63,4 +63,5 @@
 #define SO_PROTOCOL		38
 #define SO_DOMAIN		39
 
+#define SO_RXQ_OVFL             40
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index df7b23a..8c866b5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -389,8 +389,10 @@ struct sk_buff {
 #ifdef CONFIG_NETWORK_SECMARK
 	__u32			secmark;
 #endif
-
-	__u32			mark;
+	union {
+		__u32		mark;
+		__u32		dropcount;
+	};
 
 	__u16			vlan_tci;
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 98398bd..ae48d99 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -505,6 +505,7 @@ enum sock_flags {
 	SOCK_TIMESTAMPING_RAW_HARDWARE, /* %SOF_TIMESTAMPING_RAW_HARDWARE */
 	SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */
 	SOCK_FASYNC, /* fasync() active */
+	SOCK_RXQ_OVFL,
 };
 
 static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)
@@ -1493,6 +1494,18 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
 		sk->sk_stamp = kt;
 }
 
+extern void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
+	struct sk_buff *skb);
+
+static __inline__ void
+sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
+{
+	sock_recv_timestamp(msg, sk, skb);
+
+	if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
+		__sock_recv_ts_and_drops(msg, sk, skb);
+}
+
 /**
  * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped
  * @msg:	outgoing packet
diff --git a/net/atm/common.c b/net/atm/common.c
index 950bd16..d61e051 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -496,7 +496,7 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	error = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 	if (error)
 		return error;
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 	pr_debug("RcvM %d -= %d\n", atomic_read(&sk->sk_rmem_alloc), skb->truesize);
 	atm_return(vcc, skb->truesize);
 	skb_free_datagram(sk, skb);
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 1f6e49c..399e59c 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -257,7 +257,7 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	skb_reset_transport_header(skb);
 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 	if (err == 0)
-		sock_recv_timestamp(msg, sk, skb);
+		sock_recv_ts_and_drops(msg, sk, skb);
 
 	skb_free_datagram(sk, skb);
 
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index c707865..d3bfc1b 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -703,7 +703,7 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 		copied += chunk;
 		size   -= chunk;
 
-		sock_recv_timestamp(msg, sk, skb);
+		sock_recv_ts_and_drops(msg, sk, skb);
 
 		if (!(flags & MSG_PEEK)) {
 			atomic_sub(chunk, &sk->sk_rmem_alloc);
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 597da4f..2f47039 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1534,7 +1534,7 @@ static int bcm_recvmsg(struct kiocb *iocb, struct socket *sock,
 		return err;
 	}
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (msg->msg_name) {
 		msg->msg_namelen = sizeof(struct sockaddr_can);
diff --git a/net/can/raw.c b/net/can/raw.c
index b5e8979..962fc9f 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -702,7 +702,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock,
 		return err;
 	}
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (msg->msg_name) {
 		msg->msg_namelen = sizeof(struct sockaddr_can);
diff --git a/net/core/sock.c b/net/core/sock.c
index 7626b6a..0897311 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -276,6 +276,8 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	int err = 0;
 	int skb_len;
+	unsigned long flags;
+	struct sk_buff_head *list = &sk->sk_receive_queue;
 
 	/* Cast sk->rcvbuf to unsigned... It's pointless, but reduces
 	   number of warnings when compiling with -W --ANK
@@ -305,7 +307,10 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	 */
 	skb_len = skb->len;
 
-	skb_queue_tail(&sk->sk_receive_queue, skb);
+	spin_lock_irqsave(&list->lock, flags);
+	skb->dropcount = atomic_read(&sk->sk_drops);
+	__skb_queue_tail(list, skb);
+	spin_unlock_irqrestore(&list->lock, flags);
 
 	if (!sock_flag(sk, SOCK_DEAD))
 		sk->sk_data_ready(sk, skb_len);
@@ -702,6 +707,12 @@ set_rcvbuf:
 
 		/* We implement the SO_SNDLOWAT etc to
 		   not be settable (1003.1g 5.3) */
+	case SO_RXQ_OVFL:
+		if (valbool)
+			sock_set_flag(sk, SOCK_RXQ_OVFL);
+		else
+			sock_reset_flag(sk, SOCK_RXQ_OVFL);
+		break;
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -901,6 +912,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sk->sk_mark;
 		break;
 
+	case SO_RXQ_OVFL:
+		v.val = sock_flag(sk, SOCK_RXQ_OVFL);
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index a413b1b..25ad956 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -303,7 +303,7 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (err)
 		goto done;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (flags & MSG_TRUNC)
 		copied = skb->len;
diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c
index 30e74ee..769c8d1 100644
--- a/net/ieee802154/raw.c
+++ b/net/ieee802154/raw.c
@@ -191,7 +191,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	if (err)
 		goto done;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (flags & MSG_TRUNC)
 		copied = skb->len;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 757c917..f18172b 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -682,7 +682,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	if (err)
 		goto done;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	/* Copy the address. */
 	if (sin) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6ec6a8a..bb96eee 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -951,7 +951,7 @@ try_again:
 		UDP_INC_STATS_USER(sock_net(sk),
 				UDP_MIB_INDATAGRAMS, is_udplite);
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	/* Copy the address. */
 	if (sin) {
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 4f24570..d8375bc 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -497,7 +497,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 			sin6->sin6_scope_id = IP6CB(skb)->iif;
 	}
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (np->rxopt.all)
 		datagram_recv_ctl(sk, msg, skb);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index c6a303e..b51ee64 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -252,7 +252,7 @@ try_again:
 					UDP_MIB_INDATAGRAMS, is_udplite);
 	}
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	/* Copy the address. */
 	if (msg->msg_name) {
diff --git a/net/key/af_key.c b/net/key/af_key.c
index c078ae6..472f659 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3606,7 +3606,7 @@ static int pfkey_recvmsg(struct kiocb *kiocb,
 	if (err)
 		goto out_free;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	err = (flags & MSG_TRUNC) ? skb->len : copied;
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f87ed48..bf3a295 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -627,15 +627,14 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	spin_lock(&sk->sk_receive_queue.lock);
 	po->stats.tp_packets++;
+	skb->dropcount = atomic_read(&sk->sk_drops);
 	__skb_queue_tail(&sk->sk_receive_queue, skb);
 	spin_unlock(&sk->sk_receive_queue.lock);
 	sk->sk_data_ready(sk, skb->len);
 	return 0;
 
 drop_n_acct:
-	spin_lock(&sk->sk_receive_queue.lock);
-	po->stats.tp_drops++;
-	spin_unlock(&sk->sk_receive_queue.lock);
+	po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
 
 drop_n_restore:
 	if (skb_head != skb->data && skb_shared(skb)) {
@@ -1478,7 +1477,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (err)
 		goto out_free;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (msg->msg_name)
 		memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index a39bf97..60c2b94 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -146,7 +146,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
 				memcpy(msg->msg_name,
 				       &call->conn->trans->peer->srx,
 				       sizeof(call->conn->trans->peer->srx));
-			sock_recv_timestamp(msg, &rx->sk, skb);
+			sock_recv_ts_and_drops(msg, &rx->sk, skb);
 		}
 
 		/* receive the message */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index c8d0575..0970e92 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1958,7 +1958,7 @@ SCTP_STATIC int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (err)
 		goto out_free;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 	if (sctp_ulpevent_is_notification(event)) {
 		msg->msg_flags |= MSG_NOTIFICATION;
 		sp->pf->event_msgname(event, msg->msg_name, addr_len);
diff --git a/net/socket.c b/net/socket.c
index d53ad11..c82146c 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -668,6 +668,13 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 
 EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
 
+void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
+	struct sk_buff *skb)
+{
+	put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, sizeof(__u32), &skb->dropcount);
+}
+EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
+
 static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 				 struct msghdr *msg, size_t size, int flags)
 {

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v2)
  2009-10-09 19:35 ` [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v2) Neil Horman
@ 2009-10-09 21:31   ` Eric Dumazet
  2009-10-09 23:21     ` Neil Horman
  0 siblings, 1 reply; 15+ messages in thread
From: Eric Dumazet @ 2009-10-09 21:31 UTC (permalink / raw)
  To: Neil Horman; +Cc: netdev, davem, socketcan

Neil Horman a écrit :

>  
> +extern void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
> +	struct sk_buff *skb);

Surely you meant __sock_recv_drops() ? It only deals with drops.


> +	case SO_RXQ_OVFL:
> +		v.val = sock_flag(sk, SOCK_RXQ_OVFL);
> +		break;
> +

Hmm, I advise to use v.val = !!sock_flag(sk, SOCK_RXQ_OVFL);
So that application gets 0 or 1, not 0 or some big value.
Its better because it allows us to change internal SOCK_RXQ_OVFL if necessary in the future.

>  drop_n_acct:
> -	spin_lock(&sk->sk_receive_queue.lock);
> -	po->stats.tp_drops++;
> -	spin_unlock(&sk->sk_receive_queue.lock);
> +	po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);

Yes :)

>  EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
>  
> +void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
> +	struct sk_buff *skb)
> +{
> +	put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, sizeof(__u32), &skb->dropcount);
> +}
> +EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
> +

Just change the name.

And is it really too large to be inlined ?

In the contrary, sock_recv_timestamp() is so large that I suspect
your sock_recv_ts_and_drops should *not* be inlined, and include inlined versions only :

I suggest something more orthogonal like :

void inline sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
{
	if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
		put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
			 sizeof(__u32), &skb->dropcount);
}

void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
{
	sock_recv_timestamp(msg, sk, skb); // inlined
	sock_recv_drops(msg, sk, skb); // inlined
}
EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops)


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v2)
  2009-10-09 21:31   ` Eric Dumazet
@ 2009-10-09 23:21     ` Neil Horman
  0 siblings, 0 replies; 15+ messages in thread
From: Neil Horman @ 2009-10-09 23:21 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, davem, socketcan

On Fri, Oct 09, 2009 at 11:31:26PM +0200, Eric Dumazet wrote:
> Neil Horman a écrit :
> 
> >  
> > +extern void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
> > +	struct sk_buff *skb);
> 
> Surely you meant __sock_recv_drops() ? It only deals with drops.
> 
No, I certainly meant both.  The defintion clearly handles both the timestamp
cmsg and the drops cmsg.  That way we don't need to make two calls in the
receive path for these

> 
> > +	case SO_RXQ_OVFL:
> > +		v.val = sock_flag(sk, SOCK_RXQ_OVFL);
> > +		break;
> > +
> 
> Hmm, I advise to use v.val = !!sock_flag(sk, SOCK_RXQ_OVFL);
> So that application gets 0 or 1, not 0 or some big value.
> Its better because it allows us to change internal SOCK_RXQ_OVFL if necessary in the future.
> 
I don't really see any difference, sock_flag is simply a wrapper around
test_bit.  I can change it if you really need, but it just looks like additional
operations to me

> >  drop_n_acct:
> > -	spin_lock(&sk->sk_receive_queue.lock);
> > -	po->stats.tp_drops++;
> > -	spin_unlock(&sk->sk_receive_queue.lock);
> > +	po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
> 
> Yes :)
> 
> >  EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
> >  
> > +void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
> > +	struct sk_buff *skb)
> > +{
> > +	put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, sizeof(__u32), &skb->dropcount);
> > +}
> > +EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
> > +
> 
> Just change the name.
> 
No.  I'm differentiating from sock_recv_timestamp here, as I'm concerned about
the case in which we enqueue to sk_error_queue.  For those cases, in which
recvmsg is called with MSG_ERRQUEUE, I don't think its right to apply a cmsg
based on skb->dropcount, since the frame may have been from a tx path, or may
not have had dropcount set in the first place.  timestamp is still recorded
there, but I don't think we should mark the dropcount.

> And is it really too large to be inlined ?
> 
No, I was just following the style of sock_recv_timestamp.  

> In the contrary, sock_recv_timestamp() is so large that I suspect
> your sock_recv_ts_and_drops should *not* be inlined, and include inlined versions only :
> 
> I suggest something more orthogonal like :
> 
> void inline sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
> {
> 	if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
> 		put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
> 			 sizeof(__u32), &skb->dropcount);
> }
> 
> void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
> {
> 	sock_recv_timestamp(msg, sk, skb); // inlined
> 	sock_recv_drops(msg, sk, skb); // inlined
> }
> EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops)
Fine.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v3)
  2009-10-07 18:08 [PATCH] Generalize socket rx gap / receive queue overflow cmsg Neil Horman
  2009-10-08  1:05 ` Eric Dumazet
  2009-10-09 19:35 ` [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v2) Neil Horman
@ 2009-10-09 23:56 ` Neil Horman
  2009-10-10  4:59   ` Eric Dumazet
  2009-10-10  5:12   ` Eric Dumazet
  2009-10-10 12:35 ` [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v4) Neil Horman
  3 siblings, 2 replies; 15+ messages in thread
From: Neil Horman @ 2009-10-09 23:56 UTC (permalink / raw)
  To: netdev; +Cc: eric.dumazet, davem, socketcan, nhorman

Ok, take 3 with Erics new notes

Change Notes:

1) Modified inlining of sock_recv_ts_and_drops to be more efficient

2) modify getsockopt for SO_RXQ_OVFL to gurantee only a 1 or 0 return

=============================================================


Create a new socket level option to report number of queue overflows

Recently I augmented the AF_PACKET protocol to report the number of frames lost
on the socket receive queue between any two enqueued frames.  This value was
exported via a SOL_PACKET level cmsg.  AFter I completed that work it was
requested that this feature be generalized so that any datagram oriented socket
could make use of this option.  As such I've created this patch, It creates a
new SOL_SOCKET level option called SO_RXQ_OVFL, which when enabled exports a
SOL_SOCKET level cmsg that reports the nubmer of times the sk_receive_queue
overflowed between any two given frames.  It also augments the AF_PACKET
protocol to take advantage of this new feature (as it previously did not touch
sk->sk_drops, which this patch uses to record the overflow count).  Tested
successfully by me.

Notes:

1) Unlike my previous patch, this patch simply records the sk_drops value, which
is not a number of drops between packets, but rather a total number of drops.
Deltas must be computed in user space.

2) While this patch currently works with datagram oriented protocols, it will
also be accepted by non-datagram oriented protocols. I'm not sure if thats
agreeable to everyone, but my argument in favor of doing so is that, for those
protocols which aren't applicable to this option, sk_drops will always be zero,
and reporting no drops on a receive queue that isn't used for those
non-participating protocols seems reasonable to me.  This also saves us having
to code in a per-protocol opt in mechanism.

3) This applies cleanly to net-next assuming that commit
977750076d98c7ff6cbda51858bb5a5894a9d9ab (my af packet cmsg patch) is reverted

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>


 include/asm-generic/socket.h |    1 +
 include/linux/skbuff.h       |    6 ++++--
 include/net/sock.h           |    3 +++
 net/atm/common.c             |    2 +-
 net/bluetooth/af_bluetooth.c |    2 +-
 net/bluetooth/rfcomm/sock.c  |    2 +-
 net/can/bcm.c                |    2 +-
 net/can/raw.c                |    2 +-
 net/core/sock.c              |   17 ++++++++++++++++-
 net/ieee802154/dgram.c       |    2 +-
 net/ieee802154/raw.c         |    2 +-
 net/ipv4/raw.c               |    2 +-
 net/ipv4/udp.c               |    2 +-
 net/ipv6/raw.c               |    2 +-
 net/ipv6/udp.c               |    2 +-
 net/key/af_key.c             |    2 +-
 net/packet/af_packet.c       |    7 +++----
 net/rxrpc/ar-recvmsg.c       |    2 +-
 net/sctp/socket.c            |    2 +-
 net/socket.c                 |   16 ++++++++++++++++
 20 files changed, 57 insertions(+), 21 deletions(-)

diff --git a/include/asm-generic/socket.h b/include/asm-generic/socket.h
index 538991c..9a6115e 100644
--- a/include/asm-generic/socket.h
+++ b/include/asm-generic/socket.h
@@ -63,4 +63,5 @@
 #define SO_PROTOCOL		38
 #define SO_DOMAIN		39
 
+#define SO_RXQ_OVFL             40
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -389,8 +389,10 @@ struct sk_buff {
 #ifdef CONFIG_NETWORK_SECMARK
 	__u32			secmark;
 #endif
-
-	__u32			mark;
+	union {
+		__u32		mark;
+		__u32		dropcount;
+	};
 
 	__u16			vlan_tci;
 
diff --git a/include/net/sock.h b/include/net/sock.h
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -505,6 +505,7 @@ enum sock_flags {
 	SOCK_TIMESTAMPING_RAW_HARDWARE, /* %SOF_TIMESTAMPING_RAW_HARDWARE */
 	SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */
 	SOCK_FASYNC, /* fasync() active */
+	SOCK_RXQ_OVFL,
 };
 
 static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)
@@ -1493,6 +1494,8 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
 		sk->sk_stamp = kt;
 }
 
+extern void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb);
+
 /**
  * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped
  * @msg:	outgoing packet
diff --git a/net/atm/common.c b/net/atm/common.c
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -496,7 +496,7 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	error = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 	if (error)
 		return error;
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 	pr_debug("RcvM %d -= %d\n", atomic_read(&sk->sk_rmem_alloc), skb->truesize);
 	atm_return(vcc, skb->truesize);
 	skb_free_datagram(sk, skb);
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -257,7 +257,7 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	skb_reset_transport_header(skb);
 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 	if (err == 0)
-		sock_recv_timestamp(msg, sk, skb);
+		sock_recv_ts_and_drops(msg, sk, skb);
 
 	skb_free_datagram(sk, skb);
 
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -703,7 +703,7 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 		copied += chunk;
 		size   -= chunk;
 
-		sock_recv_timestamp(msg, sk, skb);
+		sock_recv_ts_and_drops(msg, sk, skb);
 
 		if (!(flags & MSG_PEEK)) {
 			atomic_sub(chunk, &sk->sk_rmem_alloc);
diff --git a/net/can/bcm.c b/net/can/bcm.c
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1534,7 +1534,7 @@ static int bcm_recvmsg(struct kiocb *iocb, struct socket *sock,
 		return err;
 	}
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (msg->msg_name) {
 		msg->msg_namelen = sizeof(struct sockaddr_can);
diff --git a/net/can/raw.c b/net/can/raw.c
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -702,7 +702,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock,
 		return err;
 	}
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (msg->msg_name) {
 		msg->msg_namelen = sizeof(struct sockaddr_can);
diff --git a/net/core/sock.c b/net/core/sock.c
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -276,6 +276,8 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
 	int err = 0;
 	int skb_len;
+	unsigned long flags;
+	struct sk_buff_head *list = &sk->sk_receive_queue;
 
 	/* Cast sk->rcvbuf to unsigned... It's pointless, but reduces
 	   number of warnings when compiling with -W --ANK
@@ -305,7 +307,10 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	 */
 	skb_len = skb->len;
 
-	skb_queue_tail(&sk->sk_receive_queue, skb);
+	spin_lock_irqsave(&list->lock, flags);
+	skb->dropcount = atomic_read(&sk->sk_drops);
+	__skb_queue_tail(list, skb);
+	spin_unlock_irqrestore(&list->lock, flags);
 
 	if (!sock_flag(sk, SOCK_DEAD))
 		sk->sk_data_ready(sk, skb_len);
@@ -702,6 +707,12 @@ set_rcvbuf:
 
 		/* We implement the SO_SNDLOWAT etc to
 		   not be settable (1003.1g 5.3) */
+	case SO_RXQ_OVFL:
+		if (valbool)
+			sock_set_flag(sk, SOCK_RXQ_OVFL);
+		else
+			sock_reset_flag(sk, SOCK_RXQ_OVFL);
+		break;
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -901,6 +912,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sk->sk_mark;
 		break;
 
+	case SO_RXQ_OVFL:
+		v.val = !!sock_flag(sk, SOCK_RXQ_OVFL);
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -303,7 +303,7 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (err)
 		goto done;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (flags & MSG_TRUNC)
 		copied = skb->len;
diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c
--- a/net/ieee802154/raw.c
+++ b/net/ieee802154/raw.c
@@ -191,7 +191,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	if (err)
 		goto done;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (flags & MSG_TRUNC)
 		copied = skb->len;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -682,7 +682,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	if (err)
 		goto done;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	/* Copy the address. */
 	if (sin) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -951,7 +951,7 @@ try_again:
 		UDP_INC_STATS_USER(sock_net(sk),
 				UDP_MIB_INDATAGRAMS, is_udplite);
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	/* Copy the address. */
 	if (sin) {
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -497,7 +497,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 			sin6->sin6_scope_id = IP6CB(skb)->iif;
 	}
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (np->rxopt.all)
 		datagram_recv_ctl(sk, msg, skb);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -252,7 +252,7 @@ try_again:
 					UDP_MIB_INDATAGRAMS, is_udplite);
 	}
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	/* Copy the address. */
 	if (msg->msg_name) {
diff --git a/net/key/af_key.c b/net/key/af_key.c
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3606,7 +3606,7 @@ static int pfkey_recvmsg(struct kiocb *kiocb,
 	if (err)
 		goto out_free;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	err = (flags & MSG_TRUNC) ? skb->len : copied;
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -627,15 +627,14 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	spin_lock(&sk->sk_receive_queue.lock);
 	po->stats.tp_packets++;
+	skb->dropcount = atomic_read(&sk->sk_drops);
 	__skb_queue_tail(&sk->sk_receive_queue, skb);
 	spin_unlock(&sk->sk_receive_queue.lock);
 	sk->sk_data_ready(sk, skb->len);
 	return 0;
 
 drop_n_acct:
-	spin_lock(&sk->sk_receive_queue.lock);
-	po->stats.tp_drops++;
-	spin_unlock(&sk->sk_receive_queue.lock);
+	po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
 
 drop_n_restore:
 	if (skb_head != skb->data && skb_shared(skb)) {
@@ -1478,7 +1477,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (err)
 		goto out_free;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (msg->msg_name)
 		memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -146,7 +146,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
 				memcpy(msg->msg_name,
 				       &call->conn->trans->peer->srx,
 				       sizeof(call->conn->trans->peer->srx));
-			sock_recv_timestamp(msg, &rx->sk, skb);
+			sock_recv_ts_and_drops(msg, &rx->sk, skb);
 		}
 
 		/* receive the message */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1958,7 +1958,7 @@ SCTP_STATIC int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (err)
 		goto out_free;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 	if (sctp_ulpevent_is_notification(event)) {
 		msg->msg_flags |= MSG_NOTIFICATION;
 		sp->pf->event_msgname(event, msg->msg_name, addr_len);
diff --git a/net/socket.c b/net/socket.c
--- a/net/socket.c
+++ b/net/socket.c
@@ -668,6 +668,22 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 
 EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
 
+inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
+{
+	if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
+		put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
+			sizeof(__u32), &skb->dropcount);
+}
+
+void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
+	struct sk_buff *skb)
+{
+	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_drops(msg, sk, skb);
+	put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, sizeof(__u32), &skb->dropcount);
+}
+EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops);
+
 static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 				 struct msghdr *msg, size_t size, int flags)
 {

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v3)
  2009-10-09 23:56 ` [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v3) Neil Horman
@ 2009-10-10  4:59   ` Eric Dumazet
  2009-10-10  5:12   ` Eric Dumazet
  1 sibling, 0 replies; 15+ messages in thread
From: Eric Dumazet @ 2009-10-10  4:59 UTC (permalink / raw)
  To: Neil Horman; +Cc: netdev, davem, socketcan

Neil Horman a écrit :
> Ok, take 3 with Erics new notes
> 
> Change Notes:
> 
> 1) Modified inlining of sock_recv_ts_and_drops to be more efficient
> 
> 2) modify getsockopt for SO_RXQ_OVFL to gurantee only a 1 or 0 return
> 
> =============================================================
> 
> 
> Create a new socket level option to report number of queue overflows
> 
> Recently I augmented the AF_PACKET protocol to report the number of frames lost
> on the socket receive queue between any two enqueued frames.  This value was
> exported via a SOL_PACKET level cmsg.  AFter I completed that work it was
> requested that this feature be generalized so that any datagram oriented socket
> could make use of this option.  As such I've created this patch, It creates a
> new SOL_SOCKET level option called SO_RXQ_OVFL, which when enabled exports a
> SOL_SOCKET level cmsg that reports the nubmer of times the sk_receive_queue
> overflowed between any two given frames.  It also augments the AF_PACKET
> protocol to take advantage of this new feature (as it previously did not touch
> sk->sk_drops, which this patch uses to record the overflow count).  Tested
> successfully by me.
> 
> Notes:
> 
> 1) Unlike my previous patch, this patch simply records the sk_drops value, which
> is not a number of drops between packets, but rather a total number of drops.
> Deltas must be computed in user space.
> 
> 2) While this patch currently works with datagram oriented protocols, it will
> also be accepted by non-datagram oriented protocols. I'm not sure if thats
> agreeable to everyone, but my argument in favor of doing so is that, for those
> protocols which aren't applicable to this option, sk_drops will always be zero,
> and reporting no drops on a receive queue that isn't used for those
> non-participating protocols seems reasonable to me.  This also saves us having
> to code in a per-protocol opt in mechanism.
> 
> 3) This applies cleanly to net-next assuming that commit
> 977750076d98c7ff6cbda51858bb5a5894a9d9ab (my af packet cmsg patch) is reverted
> 
> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

I read your patch and found one error (at the very end)

Feel free to resubmit with my

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

>  
> +inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
> +{
> +	if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
> +		put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
> +			sizeof(__u32), &skb->dropcount);
> +}
> +
> +void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
> +	struct sk_buff *skb)
> +{
> +	sock_recv_timestamp(msg, sk, skb);
> +	sock_recv_drops(msg, sk, skb);


> +	put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, sizeof(__u32), &skb->dropcount);
It's already part of sock_recv_drops()


> +}
> +EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops);
> +


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v3)
  2009-10-09 23:56 ` [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v3) Neil Horman
  2009-10-10  4:59   ` Eric Dumazet
@ 2009-10-10  5:12   ` Eric Dumazet
  1 sibling, 0 replies; 15+ messages in thread
From: Eric Dumazet @ 2009-10-10  5:12 UTC (permalink / raw)
  To: Neil Horman; +Cc: netdev, davem, socketcan

Neil Horman a écrit :
> Ok, take 3 with Erics new notes
> 
> Change Notes:
> 
> 1) Modified inlining of sock_recv_ts_and_drops to be more efficient
> 
> 2) modify getsockopt for SO_RXQ_OVFL to gurantee only a 1 or 0 return
> 
> =============================================================
> 
> 
> Create a new socket level option to report number of queue overflows
> 
> Recently I augmented the AF_PACKET protocol to report the number of frames lost
> on the socket receive queue between any two enqueued frames.  This value was
> exported via a SOL_PACKET level cmsg.  AFter I completed that work it was
> requested that this feature be generalized so that any datagram oriented socket
> could make use of this option.  As such I've created this patch, It creates a
> new SOL_SOCKET level option called SO_RXQ_OVFL, which when enabled exports a
> SOL_SOCKET level cmsg that reports the nubmer of times the sk_receive_queue
> overflowed between any two given frames.  It also augments the AF_PACKET
> protocol to take advantage of this new feature (as it previously did not touch
> sk->sk_drops, which this patch uses to record the overflow count).  Tested
> successfully by me.
> 
> Notes:

> diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
> --- a/net/ipv4/udp.c
> +++ b/net/ipv4/udp.c
> @@ -951,7 +951,7 @@ try_again:
>  		UDP_INC_STATS_USER(sock_net(sk),
>  				UDP_MIB_INDATAGRAMS, is_udplite);
>  
> -	sock_recv_timestamp(msg, sk, skb);
> +	sock_recv_ts_and_drops(msg, sk, skb);
>  
>  	/* Copy the address. */
>  	if (sin) {


As a followup to my patch about udp_poll(), I realize we dont atomic_inc(&sk->sk_drops)
in the event a packet is dropped because of a bad checksum.

I'll post a fixup once David reviewed previous patch


^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v4)
  2009-10-07 18:08 [PATCH] Generalize socket rx gap / receive queue overflow cmsg Neil Horman
                   ` (2 preceding siblings ...)
  2009-10-09 23:56 ` [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v3) Neil Horman
@ 2009-10-10 12:35 ` Neil Horman
  2009-10-12  4:38   ` Eric Dumazet
  3 siblings, 1 reply; 15+ messages in thread
From: Neil Horman @ 2009-10-10 12:35 UTC (permalink / raw)
  To: netdev; +Cc: eric.dumazet, davem, socketcan, nhorman

Version 4

Change Notes:

1) Remove the superfolous put_cmsg that I missed in the last version

=======================================================================

Create a new socket level option to report number of queue overflows

Recently I augmented the AF_PACKET protocol to report the number of frames lost
on the socket receive queue between any two enqueued frames.  This value was
exported via a SOL_PACKET level cmsg.  AFter I completed that work it was
requested that this feature be generalized so that any datagram oriented socket
could make use of this option.  As such I've created this patch, It creates a
new SOL_SOCKET level option called SO_RXQ_OVFL, which when enabled exports a
SOL_SOCKET level cmsg that reports the nubmer of times the sk_receive_queue
overflowed between any two given frames.  It also augments the AF_PACKET
protocol to take advantage of this new feature (as it previously did not touch
sk->sk_drops, which this patch uses to record the overflow count).  Tested
successfully by me.

Notes:

1) Unlike my previous patch, this patch simply records the sk_drops value, which
is not a number of drops between packets, but rather a total number of drops.
Deltas must be computed in user space.

2) While this patch currently works with datagram oriented protocols, it will
also be accepted by non-datagram oriented protocols. I'm not sure if thats
agreeable to everyone, but my argument in favor of doing so is that, for those
protocols which aren't applicable to this option, sk_drops will always be zero,
and reporting no drops on a receive queue that isn't used for those
non-participating protocols seems reasonable to me.  This also saves us having
to code in a per-protocol opt in mechanism.

3) This applies cleanly to net-next assuming that commit
977750076d98c7ff6cbda51858bb5a5894a9d9ab (my af packet cmsg patch) is reverted

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

diff --git a/include/asm-generic/socket.h b/include/asm-generic/socket.h
index 538991c..9a6115e 100644
--- a/include/asm-generic/socket.h
+++ b/include/asm-generic/socket.h
@@ -63,4 +63,5 @@
 #define SO_PROTOCOL		38
 #define SO_DOMAIN		39
 
+#define SO_RXQ_OVFL             40
 #endif /* __ASM_GENERIC_SOCKET_H */
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -389,8 +389,10 @@
 #ifdef CONFIG_NETWORK_SECMARK
 	__u32			secmark;
 #endif
-
-	__u32			mark;
+	union {
+		__u32		mark;
+		__u32		dropcount;
+	};
 
 	__u16			vlan_tci;
 
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -505,6 +505,7 @@
 	SOCK_TIMESTAMPING_RAW_HARDWARE, /* %SOF_TIMESTAMPING_RAW_HARDWARE */
 	SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */
 	SOCK_FASYNC, /* fasync() active */
+	SOCK_RXQ_OVFL,
 };
 
 static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)
@@ -1493,6 +1494,8 @@
 		sk->sk_stamp = kt;
 }
 
+extern void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb);
+
 /**
  * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped
  * @msg:	outgoing packet
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -496,7 +496,7 @@
 	error = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 	if (error)
 		return error;
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 	pr_debug("RcvM %d -= %d\n", atomic_read(&sk->sk_rmem_alloc), skb->truesize);
 	atm_return(vcc, skb->truesize);
 	skb_free_datagram(sk, skb);
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -257,7 +257,7 @@
 	skb_reset_transport_header(skb);
 	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
 	if (err == 0)
-		sock_recv_timestamp(msg, sk, skb);
+		sock_recv_ts_and_drops(msg, sk, skb);
 
 	skb_free_datagram(sk, skb);
 
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -703,7 +703,7 @@
 		copied += chunk;
 		size   -= chunk;
 
-		sock_recv_timestamp(msg, sk, skb);
+		sock_recv_ts_and_drops(msg, sk, skb);
 
 		if (!(flags & MSG_PEEK)) {
 			atomic_sub(chunk, &sk->sk_rmem_alloc);
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1534,7 +1534,7 @@
 		return err;
 	}
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (msg->msg_name) {
 		msg->msg_namelen = sizeof(struct sockaddr_can);
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -702,7 +702,7 @@
 		return err;
 	}
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (msg->msg_name) {
 		msg->msg_namelen = sizeof(struct sockaddr_can);
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -276,6 +276,8 @@
 {
 	int err = 0;
 	int skb_len;
+	unsigned long flags;
+	struct sk_buff_head *list = &sk->sk_receive_queue;
 
 	/* Cast sk->rcvbuf to unsigned... It's pointless, but reduces
 	   number of warnings when compiling with -W --ANK
@@ -305,7 +307,10 @@
 	 */
 	skb_len = skb->len;
 
-	skb_queue_tail(&sk->sk_receive_queue, skb);
+	spin_lock_irqsave(&list->lock, flags);
+	skb->dropcount = atomic_read(&sk->sk_drops);
+	__skb_queue_tail(list, skb);
+	spin_unlock_irqrestore(&list->lock, flags);
 
 	if (!sock_flag(sk, SOCK_DEAD))
 		sk->sk_data_ready(sk, skb_len);
@@ -702,6 +707,12 @@
 
 		/* We implement the SO_SNDLOWAT etc to
 		   not be settable (1003.1g 5.3) */
+	case SO_RXQ_OVFL:
+		if (valbool)
+			sock_set_flag(sk, SOCK_RXQ_OVFL);
+		else
+			sock_reset_flag(sk, SOCK_RXQ_OVFL);
+		break;
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -901,6 +912,10 @@
 		v.val = sk->sk_mark;
 		break;
 
+	case SO_RXQ_OVFL:
+		v.val = !!sock_flag(sk, SOCK_RXQ_OVFL);
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -303,7 +303,7 @@
 	if (err)
 		goto done;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (flags & MSG_TRUNC)
 		copied = skb->len;
--- a/net/ieee802154/raw.c
+++ b/net/ieee802154/raw.c
@@ -191,7 +191,7 @@
 	if (err)
 		goto done;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (flags & MSG_TRUNC)
 		copied = skb->len;
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -682,7 +682,7 @@
 	if (err)
 		goto done;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	/* Copy the address. */
 	if (sin) {
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -951,7 +951,7 @@
 		UDP_INC_STATS_USER(sock_net(sk),
 				UDP_MIB_INDATAGRAMS, is_udplite);
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	/* Copy the address. */
 	if (sin) {
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -497,7 +497,7 @@
 			sin6->sin6_scope_id = IP6CB(skb)->iif;
 	}
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (np->rxopt.all)
 		datagram_recv_ctl(sk, msg, skb);
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -252,7 +252,7 @@
 					UDP_MIB_INDATAGRAMS, is_udplite);
 	}
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	/* Copy the address. */
 	if (msg->msg_name) {
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3606,7 +3606,7 @@
 	if (err)
 		goto out_free;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	err = (flags & MSG_TRUNC) ? skb->len : copied;
 
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -627,15 +627,14 @@
 
 	spin_lock(&sk->sk_receive_queue.lock);
 	po->stats.tp_packets++;
+	skb->dropcount = atomic_read(&sk->sk_drops);
 	__skb_queue_tail(&sk->sk_receive_queue, skb);
 	spin_unlock(&sk->sk_receive_queue.lock);
 	sk->sk_data_ready(sk, skb->len);
 	return 0;
 
 drop_n_acct:
-	spin_lock(&sk->sk_receive_queue.lock);
-	po->stats.tp_drops++;
-	spin_unlock(&sk->sk_receive_queue.lock);
+	po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
 
 drop_n_restore:
 	if (skb_head != skb->data && skb_shared(skb)) {
@@ -1478,7 +1477,7 @@
 	if (err)
 		goto out_free;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (msg->msg_name)
 		memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -146,7 +146,7 @@
 				memcpy(msg->msg_name,
 				       &call->conn->trans->peer->srx,
 				       sizeof(call->conn->trans->peer->srx));
-			sock_recv_timestamp(msg, &rx->sk, skb);
+			sock_recv_ts_and_drops(msg, &rx->sk, skb);
 		}
 
 		/* receive the message */
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1958,7 +1958,7 @@
 	if (err)
 		goto out_free;
 
-	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_ts_and_drops(msg, sk, skb);
 	if (sctp_ulpevent_is_notification(event)) {
 		msg->msg_flags |= MSG_NOTIFICATION;
 		sp->pf->event_msgname(event, msg->msg_name, addr_len);
--- a/net/socket.c
+++ b/net/socket.c
@@ -668,6 +668,21 @@
 
 EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
 
+inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
+{
+	if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount)
+		put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL,
+			sizeof(__u32), &skb->dropcount);
+}
+
+void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
+	struct sk_buff *skb)
+{
+	sock_recv_timestamp(msg, sk, skb);
+	sock_recv_drops(msg, sk, skb);
+}
+EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops);
+
 static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 				 struct msghdr *msg, size_t size, int flags)
 {

^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v4)
  2009-10-10 12:35 ` [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v4) Neil Horman
@ 2009-10-12  4:38   ` Eric Dumazet
  2009-10-12  5:48     ` Oliver Hartkopp
  2009-10-12 10:01     ` David Miller
  0 siblings, 2 replies; 15+ messages in thread
From: Eric Dumazet @ 2009-10-12  4:38 UTC (permalink / raw)
  To: Neil Horman; +Cc: netdev, davem, socketcan

Neil Horman a écrit :
> Version 4
> 
> Change Notes:
> 
> 1) Remove the superfolous put_cmsg that I missed in the last version
> 
> =======================================================================
> 
> Create a new socket level option to report number of queue overflows
> 
> Recently I augmented the AF_PACKET protocol to report the number of frames lost
> on the socket receive queue between any two enqueued frames.  This value was
> exported via a SOL_PACKET level cmsg.  AFter I completed that work it was
> requested that this feature be generalized so that any datagram oriented socket
> could make use of this option.  As such I've created this patch, It creates a
> new SOL_SOCKET level option called SO_RXQ_OVFL, which when enabled exports a
> SOL_SOCKET level cmsg that reports the nubmer of times the sk_receive_queue
> overflowed between any two given frames.  It also augments the AF_PACKET
> protocol to take advantage of this new feature (as it previously did not touch
> sk->sk_drops, which this patch uses to record the overflow count).  Tested
> successfully by me.
> 
> Notes:
> 
> 1) Unlike my previous patch, this patch simply records the sk_drops value, which
> is not a number of drops between packets, but rather a total number of drops.
> Deltas must be computed in user space.
> 
> 2) While this patch currently works with datagram oriented protocols, it will
> also be accepted by non-datagram oriented protocols. I'm not sure if thats
> agreeable to everyone, but my argument in favor of doing so is that, for those
> protocols which aren't applicable to this option, sk_drops will always be zero,
> and reporting no drops on a receive queue that isn't used for those
> non-participating protocols seems reasonable to me.  This also saves us having
> to code in a per-protocol opt in mechanism.
> 
> 3) This applies cleanly to net-next assuming that commit
> 977750076d98c7ff6cbda51858bb5a5894a9d9ab (my af packet cmsg patch) is reverted
> 
> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>

Thanks Neil

I found no obvious error in this v4, except two long lines.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v4)
  2009-10-12  4:38   ` Eric Dumazet
@ 2009-10-12  5:48     ` Oliver Hartkopp
  2009-10-12 10:01     ` David Miller
  1 sibling, 0 replies; 15+ messages in thread
From: Oliver Hartkopp @ 2009-10-12  5:48 UTC (permalink / raw)
  To: Eric Dumazet, Neil Horman; +Cc: netdev, davem

Eric Dumazet wrote:
> Neil Horman a écrit :
>>
>> (..)
>>
>> =======================================================================
>>
>> Create a new socket level option to report number of queue overflows
>>
>> (..)
>>
>> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> 
> Thanks Neil
> 
> I found no obvious error in this v4, except two long lines.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Thanks for this nice solution to both of you!

I'm very happy to be able to use this generalized socket option with can-raw
sockets now and that it has been possible to find an efficient and clear patch
for that.

Best regards,
Oliver

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v4)
  2009-10-12  4:38   ` Eric Dumazet
  2009-10-12  5:48     ` Oliver Hartkopp
@ 2009-10-12 10:01     ` David Miller
  1 sibling, 0 replies; 15+ messages in thread
From: David Miller @ 2009-10-12 10:01 UTC (permalink / raw)
  To: eric.dumazet; +Cc: nhorman, netdev, socketcan

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 12 Oct 2009 06:38:23 +0200

> Neil Horman a écrit :
>> 3) This applies cleanly to net-next assuming that commit
>> 977750076d98c7ff6cbda51858bb5a5894a9d9ab (my af packet cmsg patch) is reverted
>> 
>> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
> 
> Thanks Neil
> 
> I found no obvious error in this v4, except two long lines.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

I reverted the AF_PACKET commit and applied this patch,
thanks!

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2009-10-12 10:01 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-10-07 18:08 [PATCH] Generalize socket rx gap / receive queue overflow cmsg Neil Horman
2009-10-08  1:05 ` Eric Dumazet
2009-10-08 13:54   ` Neil Horman
2009-10-08 14:45     ` Eric Dumazet
2009-10-08 17:20       ` Neil Horman
2009-10-09 19:35 ` [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v2) Neil Horman
2009-10-09 21:31   ` Eric Dumazet
2009-10-09 23:21     ` Neil Horman
2009-10-09 23:56 ` [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v3) Neil Horman
2009-10-10  4:59   ` Eric Dumazet
2009-10-10  5:12   ` Eric Dumazet
2009-10-10 12:35 ` [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v4) Neil Horman
2009-10-12  4:38   ` Eric Dumazet
2009-10-12  5:48     ` Oliver Hartkopp
2009-10-12 10:01     ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).