netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* kernel 2.6.39 eats multicast packets
@ 2011-06-17  8:32 Knut Tidemann
  2011-06-18 10:25 ` Eric Dumazet
  0 siblings, 1 reply; 3+ messages in thread
From: Knut Tidemann @ 2011-06-17  8:32 UTC (permalink / raw)
  To: netdev; +Cc: davem

Hello.

We're seeing an issue where a listening UDP socket in a multicast group  
doesn't receive some multicast packets.
 From simple testing it seems that the first packet from a new host is not  
passed through the kernel and down to the socket, but the next packets  
are. The packets can be
seen with a tool such as tcpdump, but they never reach the user space  
socket. It is worth noting, that the packet loss does not occur when  
sending to and from the same host,
to a multicast address. The address and port we have been using in our  
tests are 224.0.1.75:5060. I've also attached the testing code at the end  
of this email. The issue was also present in 3.0-rc1.

This issue is not present in 2.6.38 and I've bisected the issue to the  
following commit:

----
b23dd4fe42b455af5c6e20966b7d6959fa8352ea is the first bad commit
commit b23dd4fe42b455af5c6e20966b7d6959fa8352ea
Author: David S. Miller <davem@davemloft.net>
Date:   Wed Mar 2 14:31:35 2011 -0800

     ipv4: Make output route lookup return rtable directly.

     Instead of on the stack.

     Signed-off-by: David S. Miller <davem@davemloft.net>
----


PS:
The history around this commit seems somewhat confusing, so the bisect  
lead us to a 2.6.38-rc5 commit, even though 2.6.38 is clean and without  
issues. I've
tried to revert the commit in 2.6.39, but the conflicts were too many to  
test if the issue went away or not. Not being a kernel hacker my self I  
did not make
any good attempts to resolve these conflicts.


----
Test code:
Sender application:

#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>

int setup_socket()
{
     int fd;

     struct sockaddr_in addr;

     if((fd = socket(PF_INET, SOCK_DGRAM, 0)) == -1) {
         perror("Could not creat socket");
         return -1;
     }

     memset(&addr, 1, sizeof(addr));
     addr.sin_family = AF_INET;
     addr.sin_port = 0;
     addr.sin_addr.s_addr = INADDR_ANY;
     if(bind(fd, (const sockaddr*)&addr, sizeof(addr)) == -1) {
         perror("Could not bind socket to 0.0.0.0:5060");
         return -1;
     }

     return fd;
}

int main(int argc, char *argv[])
{
     int fd, len, res, packet_nr;
     char buf[256];
     struct sockaddr_in addr;

     res = 1;
     packet_nr = 0;

     if((fd = setup_socket()) == -1) {
         fprintf(stderr, "Could not setup socket. Aborting.\n");
         return -1;
     }

     fprintf(stdout, "Simple sender ready to send to 224.0.1.75:5060\n");

     memset(&addr, 0, sizeof(struct sockaddr_in));
     addr.sin_family = AF_INET;
     addr.sin_port = htons(5060);
     addr.sin_addr.s_addr = inet_addr("224.0.1.75");

     while(res > 0) {
         len = sprintf(buf, "Packet %d", packet_nr);
         res = sendto(fd, buf, len, 0, (const sockaddr *)&addr,  
sizeof(addr));
         fprintf(stdout, "Sent packet nr %d\n", packet_nr);
         ++packet_nr;
         sleep(2);
     }

     if(res < 0) {
         perror("Error during sending");
     }

     return res;
}

------
Receiver application

#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <string.h>
#include <stdio.h>

int setup_socket()
{
     int fd;

     struct ip_mreq mreq;
     struct sockaddr_in addr;

     if((fd = socket(PF_INET, SOCK_DGRAM, 0)) == -1) {
         perror("Could not creat socket");
         return -1;
     }

     memset(&addr, 1, sizeof(addr));
     addr.sin_family = AF_INET;
     addr.sin_port = htons(5060);
     addr.sin_addr.s_addr = INADDR_ANY;
     if(bind(fd, (const sockaddr*)&addr, sizeof(addr)) == -1) {
         perror("Could not bind socket to 0.0.0.0:5060");
         return -1;
     }

     memset(&mreq, 0, sizeof(mreq));
     mreq.imr_multiaddr.s_addr = inet_addr("224.0.1.75");
     mreq.imr_interface.s_addr = INADDR_ANY;

     if(setsockopt(fd, IPPROTO_IP, IP_ADD_MEMBERSHIP, &mreq, sizeof(mreq))  
== -1) {
         perror("Could not join multicast group");
         return -1;
     }

     return fd;
}

int main(int argc, char *argv[])
{
     int fd, res;
     char buf[8192], *from;
     struct sockaddr_in addr;
     socklen_t addr_len = sizeof(addr);

     if((fd = setup_socket()) == -1) {
         fprintf(stderr, "Could not setup socket. Aborting.\n");
         return -1;
     }

     fprintf(stdout, "Simple receiver ready on 0.0.0.0:5060\n");

     res = recvfrom(fd, buf, sizeof(buf), 0, (struct sockaddr*)&addr,  
&addr_len);
     while(res > 0)
     {
         buf[res] = 0;
         from = inet_ntoa(addr.sin_addr);
         fprintf(stdout, "Got packet from %s:%d\n",from,  
ntohs(addr.sin_port));
         fprintf(stdout, "%s\n", buf);

         res = recvfrom(fd, buf, sizeof(buf), 0, (struct sockaddr*)&addr,  
&addr_len);
     }

     if(res < 0) {
         perror("Error during receive");
     }

     return res;
}

------

With regards
Knut Andre Tidemann

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: kernel 2.6.39 eats multicast packets
  2011-06-17  8:32 kernel 2.6.39 eats multicast packets Knut Tidemann
@ 2011-06-18 10:25 ` Eric Dumazet
  2011-06-18 18:59   ` David Miller
  0 siblings, 1 reply; 3+ messages in thread
From: Eric Dumazet @ 2011-06-18 10:25 UTC (permalink / raw)
  To: Knut Tidemann; +Cc: netdev, davem

Le vendredi 17 juin 2011 à 10:32 +0200, Knut Tidemann a écrit :
> Hello.
> 
> We're seeing an issue where a listening UDP socket in a multicast group  
> doesn't receive some multicast packets.
>  From simple testing it seems that the first packet from a new host is not  
> passed through the kernel and down to the socket, but the next packets  
> are. The packets can be
> seen with a tool such as tcpdump, but they never reach the user space  
> socket. It is worth noting, that the packet loss does not occur when  
> sending to and from the same host,
> to a multicast address. The address and port we have been using in our  
> tests are 224.0.1.75:5060. I've also attached the testing code at the end  
> of this email. The issue was also present in 3.0-rc1.
> 
> This issue is not present in 2.6.38 and I've bisected the issue to the  
> following commit:
> 
> ----
> b23dd4fe42b455af5c6e20966b7d6959fa8352ea is the first bad commit
> commit b23dd4fe42b455af5c6e20966b7d6959fa8352ea
> Author: David S. Miller <davem@davemloft.net>
> Date:   Wed Mar 2 14:31:35 2011 -0800
> 
>      ipv4: Make output route lookup return rtable directly.
> 
>      Instead of on the stack.
> 
>      Signed-off-by: David S. Miller <davem@davemloft.net>
> ----
> 

Knut, this is awesome, your bug report is perfect and was really helpful
to let me fix the bug in maybe 15 minutes, including reboot and tests ;)

Many thanks !

[PATCH] ipv4: fix multicast losses

Knut Tidemann found that first packet of a multicast flow was not
correctly received, and bisected the regression to commit b23dd4fe42b4
(Make output route lookup return rtable directly.)

Special thanks to Knut, who provided a very nice bug report, including
sample programs to demonstrate the bug.

Reported-and-bisectedby: Knut Tidemann <knut.andre.tidemann@jotron.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 net/ipv4/route.c |    4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 045f0ec..aa13ef1 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1902,9 +1902,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 
 	hash = rt_hash(daddr, saddr, dev->ifindex, rt_genid(dev_net(dev)));
 	rth = rt_intern_hash(hash, rth, skb, dev->ifindex);
-	err = 0;
-	if (IS_ERR(rth))
-		err = PTR_ERR(rth);
+	return IS_ERR(rth) ? PTR_ERR(rth) : 0;
 
 e_nobufs:
 	return -ENOBUFS;



^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: kernel 2.6.39 eats multicast packets
  2011-06-18 10:25 ` Eric Dumazet
@ 2011-06-18 18:59   ` David Miller
  0 siblings, 0 replies; 3+ messages in thread
From: David Miller @ 2011-06-18 18:59 UTC (permalink / raw)
  To: eric.dumazet; +Cc: knut.andre.tidemann, netdev

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 18 Jun 2011 12:25:24 +0200

> [PATCH] ipv4: fix multicast losses
> 
> Knut Tidemann found that first packet of a multicast flow was not
> correctly received, and bisected the regression to commit b23dd4fe42b4
> (Make output route lookup return rtable directly.)
> 
> Special thanks to Knut, who provided a very nice bug report, including
> sample programs to demonstrate the bug.
> 
> Reported-and-bisectedby: Knut Tidemann <knut.andre.tidemann@jotron.com>
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Applied, thanks everyone.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2011-06-18 19:00 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-06-17  8:32 kernel 2.6.39 eats multicast packets Knut Tidemann
2011-06-18 10:25 ` Eric Dumazet
2011-06-18 18:59   ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).