netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Eric Dumazet <eric.dumazet@gmail.com>
To: Lucian Adrian Grijincu <lgrijincu@ixiacom.com>
Cc: David Miller <davem@davemloft.net>,
	netdev@vger.kernel.org, opurdila@ixiacom.com
Subject: Re: [PATCH 1/2] udp: cleanup __udp4_lib_mcast_deliver
Date: Fri, 06 Nov 2009 16:10:22 +0100	[thread overview]
Message-ID: <4AF43C5E.4060300@gmail.com> (raw)
In-Reply-To: <200911061604.21465.lgrijincu@ixiacom.com>

Lucian Adrian Grijincu a écrit :
> 
> As far as I understand it, the spin locks protect the hslot, and freeing the 
> skb does not walk/change or interact with the hslot in any way.
> 

Yes, but this single skb freeing is in multicast very slow path
(it happens if we receive a multicast packet with no listener, which should
 not happen with multicast aware network...)


If you really want to optimize this part, we could use an array of
32 (or 64) socket pointers, to be able to perform the really expensive
work (skb_clone(), udp_queue_rcv_skb()) outside of the lock.

Something like this untested patch :


 net/ipv4/udp.c |   68 ++++++++++++++++++++++++++++++-----------------
 1 files changed, 44 insertions(+), 24 deletions(-)

diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index d5e75e9..5d71aee 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1190,6 +1190,24 @@ drop:
 	return -1;
 }
 
+
+static void flush_stack(struct sock **stack, unsigned int count,
+			struct sk_buff *skb, unsigned int final)
+{
+	unsigned int i;
+	struct sk_buff *skb1 = NULL;
+
+	for (i = 0; i < count; i++) {
+		if (likely(skb1 == NULL))
+			skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC);
+
+		if (likely(skb1 && udp_queue_rcv_skb(stack[i], skb1) <= 0))
+			skb1 = NULL;
+	}
+	if (unlikely(skb1))
+		consume_skb(skb1);
+}
+
 /*
  *	Multicasts and broadcasts go to each listener.
  *
@@ -1201,38 +1219,40 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb,
 				    __be32 saddr, __be32 daddr,
 				    struct udp_table *udptable)
 {
-	struct sock *sk;
+	struct sock *sk, *stack[256 / sizeof(void *)];
 	struct udp_hslot *hslot = udp_hashslot(udptable, net, ntohs(uh->dest));
 	int dif;
+	unsigned int i, count = 0;
 
 	spin_lock(&hslot->lock);
 	sk = sk_nulls_head(&hslot->head);
 	dif = skb->dev->ifindex;
 	sk = udp_v4_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);
-	if (sk) {
-		struct sock *sknext = NULL;
-
-		do {
-			struct sk_buff *skb1 = skb;
-
-			sknext = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest,
-						   daddr, uh->source, saddr,
-						   dif);
-			if (sknext)
-				skb1 = skb_clone(skb, GFP_ATOMIC);
-
-			if (skb1) {
-				int ret = udp_queue_rcv_skb(sk, skb1);
-				if (ret > 0)
-					/* we should probably re-process instead
-					 * of dropping packets here. */
-					kfree_skb(skb1);
-			}
-			sk = sknext;
-		} while (sknext);
-	} else
-		consume_skb(skb);
+	while (sk) {
+		stack[count++] = sk;
+		if (unlikely(count == ARRAY_SIZE(stack))) {
+			flush_stack(stack, count, skb, ~0);
+			count = 0;
+		}
+
+		sk = udp_v4_mcast_next(net, sk_nulls_next(sk), uh->dest,
+				       daddr, uh->source, saddr, dif);
+	}
+	/*
+	 * before releasing the lock, we must take reference on sockets
+	 */
+	for (i = 0; i < count; i++)
+		sock_hold(stack[i]);
+
 	spin_unlock(&hslot->lock);
+
+	/*
+	 * do the slow work with no lock held
+	 */
+	flush_stack(stack, count, skb, count - 1);
+
+	for (i = 0; i < count; i++)
+		sock_put(stack[i]);
 	return 0;
 }
 

  reply	other threads:[~2009-11-06 15:10 UTC|newest]

Thread overview: 15+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-11-05 18:33 [PATCH 1/2] udp: cleanup __udp4_lib_mcast_deliver Lucian Adrian Grijincu
2009-11-06  8:42 ` David Miller
2009-11-06 14:04   ` Lucian Adrian Grijincu
2009-11-06 15:10     ` Eric Dumazet [this message]
2009-11-06 15:59       ` [PATCH net-next-2.6] udp: Optimise multicast reception Eric Dumazet
2009-11-06 16:30         ` [PATCH net-next-2.6] ipv6: " Eric Dumazet
2009-11-06 16:35           ` Eric Dumazet
2009-11-06 17:06             ` [PATCH net-next-2.6 take2] " Eric Dumazet
2009-11-06 17:19               ` Lucian Adrian Grijincu
2009-11-06 17:24                 ` Eric Dumazet
2009-11-06 17:54                   ` Eric Dumazet
2009-11-06 17:59                     ` Lucian Adrian Grijincu
2009-11-06 18:03                       ` Eric Dumazet
2009-11-06 16:35         ` [PATCH net-next-2.6] " Lucian Adrian Grijincu
2009-11-06 16:54           ` Eric Dumazet

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4AF43C5E.4060300@gmail.com \
    --to=eric.dumazet@gmail.com \
    --cc=davem@davemloft.net \
    --cc=lgrijincu@ixiacom.com \
    --cc=netdev@vger.kernel.org \
    --cc=opurdila@ixiacom.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).