All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Dumazet <eric.dumazet@gmail.com>
To: Neil Horman <nhorman@tuxdriver.com>
Cc: netdev@vger.kernel.org, davem@davemloft.net, socketcan@hartkopp.net
Subject: Re: [PATCH] Generalize socket rx gap / receive queue overflow cmsg
Date: Thu, 08 Oct 2009 03:05:12 +0200	[thread overview]
Message-ID: <4ACD3AC8.608@gmail.com> (raw)
In-Reply-To: <20091007180835.GB20524@hmsreliant.think-freely.org>

Neil Horman a écrit :
> diff --git a/net/core/sock.c b/net/core/sock.c
> index 7626b6a..8bd366f 100644
> --- a/net/core/sock.c
> +++ b/net/core/sock.c
> @@ -306,6 +306,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
>  	skb_len = skb->len;
>  


>  	skb_queue_tail(&sk->sk_receive_queue, skb);
> +	skb->dropcount = atomic_read(&sk->sk_drops);

No, skb was given to skb_queue_tail(), you are not allowed to touch it now,
another cpu might already consume it.

You better do :

struct sk_buff_head *list = &sk->sk_receive_queue;

spin_lock_irqsave(&list->lock, flags);
skb->dropcount = atomic_read(&sk->sk_drops); // should be done under lock protection
__skb_queue_tail(list, newsk);
spin_unlock_irqrestore(&list->lock, flags);



>  
>  	if (!sock_flag(sk, SOCK_DEAD))
>  		sk->sk_data_ready(sk, skb_len);
> @@ -702,6 +703,12 @@ set_rcvbuf:
>  
>  		/* We implement the SO_SNDLOWAT etc to
>  		   not be settable (1003.1g 5.3) */
> +	case SO_RXQ_OVFL:
> +		if (valbool)
> +			set_bit(SOCK_RXQ_OVFL, &sock->flags);
> +		else
> +			clear_bit(SOCK_RXQ_OVFL, &sock->flags);
> +		break;
>  	default:
>  		ret = -ENOPROTOOPT;
>  		break;
> @@ -901,6 +908,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
>  		v.val = sk->sk_mark;
>  		break;
>  
> +	case SO_RXQ_OVFL:
> +		v.val = test_bit(SOCK_RXQ_OVFL, &sock->flags);
> +		break;
> +
>  	default:
>  		return -ENOPROTOOPT;
>  	}
> diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
> index d7ecca0..920ae1e 100644
> --- a/net/packet/af_packet.c
> +++ b/net/packet/af_packet.c
> @@ -617,6 +617,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
>  	if (pskb_trim(skb, snaplen))
>  		goto drop_n_acct;
>  

> +	skb->dropcount = atomic_read(&sk->sk_drops);
This should be done a litle bit after, right before "__skb_queue_tail(&sk->sk_receive_queue, skb); "

>  	skb_set_owner_r(skb, sk);
>  	skb->dev = NULL;
>  	skb_dst_drop(skb);
> @@ -634,6 +635,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
>  drop_n_acct:


>  	spin_lock(&sk->sk_receive_queue.lock);
>  	po->stats.tp_drops++;
> +	atomic_inc(&sk->sk_drops);
>  	spin_unlock(&sk->sk_receive_queue.lock);

You could replace this block of four lines by : po->stat.tp_drop = atomic_inc_return(&sk->sk_drops);

>  
>  drop_n_restore:
> diff --git a/net/socket.c b/net/socket.c
> index 7565536..ad157a3 100644
> --- a/net/socket.c
> +++ b/net/socket.c
> @@ -673,6 +673,12 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
>  {
>  	int err;
>  	struct sock_iocb *si = kiocb_to_siocb(iocb);
> +	struct sk_buff *skb;
> +	int rc;
> +	struct sock *sk = sock->sk;
> +	unsigned long cpu_flags;
> +	__u32 gap = 0;

> +	int check_drops = test_bit(SOCK_RXQ_OVFL, &sock->flags);
>  
>  	si->sock = sock;
>  	si->scm = NULL;
> @@ -684,7 +690,21 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock,
>  	if (err)
>  		return err;
>  
> -	return sock->ops->recvmsg(iocb, sock, msg, size, flags);




> +	if (check_drops) {
> +		skb = skb_recv_datagram(sk, flags|MSG_PEEK,
> +				flags & MSG_DONTWAIT, &err);

	Ouch, this is too expensive, please find another way :)

> +		if (skb) {
> +			gap = skb->dropcount;
> +			consume_skb(skb);
> +		}
> +	}
> +
> +	rc = sock->ops->recvmsg(iocb, sock, msg, size, flags);
> +
> +	if (check_drops && (rc > 0))

		&& gap != 0

> +		put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, sizeof(__u32), &gap);
> +


  reply	other threads:[~2009-10-08  1:06 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-10-07 18:08 [PATCH] Generalize socket rx gap / receive queue overflow cmsg Neil Horman
2009-10-08  1:05 ` Eric Dumazet [this message]
2009-10-08 13:54   ` Neil Horman
2009-10-08 14:45     ` Eric Dumazet
2009-10-08 17:20       ` Neil Horman
2009-10-09 19:35 ` [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v2) Neil Horman
2009-10-09 21:31   ` Eric Dumazet
2009-10-09 23:21     ` Neil Horman
2009-10-09 23:56 ` [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v3) Neil Horman
2009-10-10  4:59   ` Eric Dumazet
2009-10-10  5:12   ` Eric Dumazet
2009-10-10 12:35 ` [PATCH] Generalize socket rx gap / receive queue overflow cmsg (v4) Neil Horman
2009-10-12  4:38   ` Eric Dumazet
2009-10-12  5:48     ` Oliver Hartkopp
2009-10-12 10:01     ` David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4ACD3AC8.608@gmail.com \
    --to=eric.dumazet@gmail.com \
    --cc=davem@davemloft.net \
    --cc=netdev@vger.kernel.org \
    --cc=nhorman@tuxdriver.com \
    --cc=socketcan@hartkopp.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.