netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [BUG] af_packet: loop at reception when using fanout
@ 2012-08-06 14:21 Eric Leblond
  2012-08-16 15:44 ` [RFC PATCH v1.0] af_packet: don't emit packet on orig fanout group Eric Leblond
  0 siblings, 1 reply; 7+ messages in thread
From: Eric Leblond @ 2012-08-06 14:21 UTC (permalink / raw)
  To: netdev, David Miller

[-- Attachment #1: Type: text/plain, Size: 1640 bytes --]

Hello,

When using fanout mode on a AF_PACKET socket, the packet sent via one of
the fanout socket are sent back to one of the socket in the fanout set.
I've read the code and the error seems to be in dev.c.
The code check if skb->sk is not equal to ptype->af_packet_priv but in
the fanout case, we should check that skb->sk is not in the sockets of
the fanout set.

Here's how could look a possible implementation of the fix:

diff --git a/net/core/dev.c b/net/core/dev.c
index 1cb0d8a..63d144f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1654,7 +1654,12 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
                 */
                if ((ptype->dev == dev || !ptype->dev) &&
                    (ptype->af_packet_priv == NULL ||
-                    (struct sock *)ptype->af_packet_priv != skb->sk)) {
+                       (((struct sock *)ptype->af_packet_priv != skb->sk)
+                       &&
+                        /* MISSING CHECK: af_packet_priv  skb->sk one of the socket in fanout id of skb */
+                       ! packet_sk_in_fanout(ptype, skb->sk)
+                        )
+                       ) {
                        if (pt_prev) {
                                deliver_skb(skb2, pt_prev, skb->dev);
                                pt_prev = ptype;

If this is done, we will have to export one function from the af_packet
module and I don't like the idea. If it is correct way to do it I can
implement this and propose a patch.

BR,
-- 
Eric Leblond 
Blog: http://home.regit.org/ - Portfolio: http://regit.500px.com/

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [RFC PATCH v1.0] af_packet: don't emit packet on orig fanout group
  2012-08-06 14:21 [BUG] af_packet: loop at reception when using fanout Eric Leblond
@ 2012-08-16 15:44 ` Eric Leblond
  2012-08-16 21:52   ` David Miller
  0 siblings, 1 reply; 7+ messages in thread
From: Eric Leblond @ 2012-08-16 15:44 UTC (permalink / raw)
  To: davem; +Cc: netdev, Eric Leblond

If a packet is emitted on one socket in one group of fanout sockets,
it is transmitted again. It is thus read again on one of the sockets
of the fanout group. This result in a loop for software which
generate packets when receiving one.
This retransmission is not the intended behavior: a fanout group
must behave like a single socket. The packet should not be
transmitted on a socket if it originates from a socket belonging
to the same fanout group.

This patch fixes the issue by changing the transmission check to
take fanout group info account.
---
 include/linux/if_packet.h |    2 ++
 include/linux/netdevice.h |    3 +++
 net/core/dev.c            |   23 +++++++++++++++++++++--
 net/packet/af_packet.c    |    7 +++++++
 4 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index f379929..388519e 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -269,4 +269,6 @@ struct packet_mreq {
 #define PACKET_MR_ALLMULTI	2
 #define PACKET_MR_UNICAST	3
 
+void *pkt_sk_get_fanout(struct sock *sk);
+
 #endif
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1d6ab69..7785730 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1508,6 +1508,8 @@ struct napi_gro_cb {
 
 #define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
 
+#define NETDEV_TYPE_AF_PACKET_FANOUT	1 << 0
+
 struct packet_type {
 	__be16			type;	/* This is really htons(ether_type). */
 	struct net_device	*dev;	/* NULL is wildcarded here	     */
@@ -1522,6 +1524,7 @@ struct packet_type {
 					       struct sk_buff *skb);
 	int			(*gro_complete)(struct sk_buff *skb);
 	void			*af_packet_priv;
+	unsigned char		flags;
 	struct list_head	list;
 };
 
diff --git a/net/core/dev.c b/net/core/dev.c
index ce1bccb..b9498ac 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -89,6 +89,7 @@
 #include <linux/errno.h>
 #include <linux/interrupt.h>
 #include <linux/if_ether.h>
+#include <linux/if_packet.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
@@ -1651,6 +1652,25 @@ static inline int deliver_skb(struct sk_buff *skb,
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
+static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
+{
+	if (ptype->af_packet_priv == NULL)
+		return false;
+
+#ifdef CONFIG_PACKET
+	if (ptype->flags & NETDEV_TYPE_AF_PACKET_FANOUT) {
+		if (ptype->af_packet_priv == pkt_sk_get_fanout(skb->sk))
+			return true;
+	} else
+		if ((struct sock *)ptype->af_packet_priv == skb->sk)
+			return true;
+#else
+	if ((struct sock *)ptype->af_packet_priv == skb->sk)
+		return true;
+#endif
+	return false;
+}
+
 /*
  *	Support routine. Sends outgoing frames to any network
  *	taps currently in use.
@@ -1668,8 +1688,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 		 * they originated from - MvS (miquels@drinkel.ow.org)
 		 */
 		if ((ptype->dev == dev || !ptype->dev) &&
-		    (ptype->af_packet_priv == NULL ||
-		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
+		    (!skb_loop_sk(ptype, skb))) {
 			if (pt_prev) {
 				deliver_skb(skb2, pt_prev, skb->dev);
 				pt_prev = ptype;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8a1605a..b57aeca 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -247,6 +247,12 @@ struct packet_skb_cb {
 static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
 static void __fanout_link(struct sock *sk, struct packet_sock *po);
 
+void *pkt_sk_get_fanout(struct sock *sk)
+{
+	return (void*)((struct packet_sock *)sk)->fanout;
+}
+EXPORT_SYMBOL(pkt_sk_get_fanout);
+
 /* register_prot_hook must be invoked with the po->bind_lock held,
  * or from a context in which asynchronous accesses to the packet
  * socket is not possible (packet_create()).
@@ -1230,6 +1236,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 		match->prot_hook.dev = po->prot_hook.dev;
 		match->prot_hook.func = packet_rcv_fanout;
 		match->prot_hook.af_packet_priv = match;
+		match->prot_hook.flags |= NETDEV_TYPE_AF_PACKET_FANOUT;
 		dev_add_pack(&match->prot_hook);
 		list_add(&match->list, &fanout_list);
 	}
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH v1.0] af_packet: don't emit packet on orig fanout group
  2012-08-16 15:44 ` [RFC PATCH v1.0] af_packet: don't emit packet on orig fanout group Eric Leblond
@ 2012-08-16 21:52   ` David Miller
  2012-08-16 21:52     ` David Miller
  2012-08-17  8:03     ` [RFC PATCH v1.0] " Ying Xue
  0 siblings, 2 replies; 7+ messages in thread
From: David Miller @ 2012-08-16 21:52 UTC (permalink / raw)
  To: eric; +Cc: netdev

From: Eric Leblond <eric@regit.org>
Date: Thu, 16 Aug 2012 17:44:50 +0200

> If a packet is emitted on one socket in one group of fanout sockets,
> it is transmitted again. It is thus read again on one of the sockets
> of the fanout group. This result in a loop for software which
> generate packets when receiving one.
> This retransmission is not the intended behavior: a fanout group
> must behave like a single socket. The packet should not be
> transmitted on a socket if it originates from a socket belonging
> to the same fanout group.
> 
> This patch fixes the issue by changing the transmission check to
> take fanout group info account.

This looks mostly fine, thanks for fixing this.

I wonder if it wouldn't be better to simply have a callback?  That
would eliminate all of the ifdefs:

	if (ptype->id_match) {
		if (ptype->id_match(ptype, skb->sk))
			return true;
	} else if (ptype->af_packet_priv == skb->sk)
		return true;

It's a shame that we have a user of af_packet_priv outside of
AF_PACKET, in TIPC.  If we could get rid of that we could simplify
things even futher.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH v1.0] af_packet: don't emit packet on orig fanout group
  2012-08-16 21:52   ` David Miller
@ 2012-08-16 21:52     ` David Miller
  2012-08-17  8:02       ` [PATCH v1.1] " Eric Leblond
  2012-08-17  8:03     ` [RFC PATCH v1.0] " Ying Xue
  1 sibling, 1 reply; 7+ messages in thread
From: David Miller @ 2012-08-16 21:52 UTC (permalink / raw)
  To: eric; +Cc: netdev


BTW, please also give "Aleksandr Kotov <a1k@mail.ru>" credit for
reporting this problem recently with a Reported-by tag, thanks.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v1.1] af_packet: don't emit packet on orig fanout group
  2012-08-16 21:52     ` David Miller
@ 2012-08-17  8:02       ` Eric Leblond
  2012-08-20  9:37         ` David Miller
  0 siblings, 1 reply; 7+ messages in thread
From: Eric Leblond @ 2012-08-17  8:02 UTC (permalink / raw)
  To: davem; +Cc: netdev, Eric Leblond

If a packet is emitted on one socket in one group of fanout sockets,
it is transmitted again. It is thus read again on one of the sockets
of the fanout group. This result in a loop for software which
generate packets when receiving one.
This retransmission is not the intended behavior: a fanout group
must behave like a single socket. The packet should not be
transmitted on a socket if it originates from a socket belonging
to the same fanout group.

This patch fixes the issue by changing the transmission check to
take fanout group info account.

Reported-by: Aleksandr Kotov <a1k@mail.ru>
Signed-off-by: Eric Leblond <eric@regit.org>
---
 include/linux/netdevice.h |    2 ++
 net/core/dev.c            |   16 ++++++++++++++--
 net/packet/af_packet.c    |    9 +++++++++
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1d6ab69..a6060b2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1521,6 +1521,8 @@ struct packet_type {
 	struct sk_buff		**(*gro_receive)(struct sk_buff **head,
 					       struct sk_buff *skb);
 	int			(*gro_complete)(struct sk_buff *skb);
+	bool			(*id_match)(struct packet_type *ptype,
+					    struct sock *sk);
 	void			*af_packet_priv;
 	struct list_head	list;
 };
diff --git a/net/core/dev.c b/net/core/dev.c
index ce1bccb..05eafb2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1651,6 +1651,19 @@ static inline int deliver_skb(struct sk_buff *skb,
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
+static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
+{
+	if (ptype->af_packet_priv == NULL)
+		return false;
+
+	if (ptype->id_match)
+		return ptype->id_match(ptype, skb->sk);
+	else if ((struct sock *)ptype->af_packet_priv == skb->sk)
+		return true;
+
+	return false;
+}
+
 /*
  *	Support routine. Sends outgoing frames to any network
  *	taps currently in use.
@@ -1668,8 +1681,7 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 		 * they originated from - MvS (miquels@drinkel.ow.org)
 		 */
 		if ((ptype->dev == dev || !ptype->dev) &&
-		    (ptype->af_packet_priv == NULL ||
-		     (struct sock *)ptype->af_packet_priv != skb->sk)) {
+		    (!skb_loop_sk(ptype, skb))) {
 			if (pt_prev) {
 				deliver_skb(skb2, pt_prev, skb->dev);
 				pt_prev = ptype;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8a1605a..7a34ace 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1178,6 +1178,14 @@ static void __fanout_unlink(struct sock *sk, struct packet_sock *po)
 	spin_unlock(&f->lock);
 }
 
+bool match_fanout_group(struct packet_type *ptype, struct sock * sk)
+{
+	if (ptype->af_packet_priv == (void*)((struct packet_sock *)sk)->fanout)
+		return true;
+
+	return false;
+}
+
 static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 {
 	struct packet_sock *po = pkt_sk(sk);
@@ -1230,6 +1238,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 		match->prot_hook.dev = po->prot_hook.dev;
 		match->prot_hook.func = packet_rcv_fanout;
 		match->prot_hook.af_packet_priv = match;
+		match->prot_hook.id_match = match_fanout_group;
 		dev_add_pack(&match->prot_hook);
 		list_add(&match->list, &fanout_list);
 	}
-- 
1.7.10.4

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [RFC PATCH v1.0] af_packet: don't emit packet on orig fanout group
  2012-08-16 21:52   ` David Miller
  2012-08-16 21:52     ` David Miller
@ 2012-08-17  8:03     ` Ying Xue
  1 sibling, 0 replies; 7+ messages in thread
From: Ying Xue @ 2012-08-17  8:03 UTC (permalink / raw)
  To: David Miller; +Cc: eric, netdev

David Miller wrote:
> From: Eric Leblond <eric@regit.org>
> Date: Thu, 16 Aug 2012 17:44:50 +0200
>
>   
>> If a packet is emitted on one socket in one group of fanout sockets,
>> it is transmitted again. It is thus read again on one of the sockets
>> of the fanout group. This result in a loop for software which
>> generate packets when receiving one.
>> This retransmission is not the intended behavior: a fanout group
>> must behave like a single socket. The packet should not be
>> transmitted on a socket if it originates from a socket belonging
>> to the same fanout group.
>>
>> This patch fixes the issue by changing the transmission check to
>> take fanout group info account.
>>     
>
> This looks mostly fine, thanks for fixing this.
>
> I wonder if it wouldn't be better to simply have a callback?  That
> would eliminate all of the ifdefs:
>
> 	if (ptype->id_match) {
> 		if (ptype->id_match(ptype, skb->sk))
> 			return true;
> 	} else if (ptype->af_packet_priv == skb->sk)
> 		return true;
>
> It's a shame that we have a user of af_packet_priv outside of
> AF_PACKET, in TIPC.  If we could get rid of that we could simplify
> things even futher.
>   

Hi David, next week I will take over the job to get rid the usage of 
af_packet_priv from TIPC.

Regards,
Ying

> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>
>   

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v1.1] af_packet: don't emit packet on orig fanout group
  2012-08-17  8:02       ` [PATCH v1.1] " Eric Leblond
@ 2012-08-20  9:37         ` David Miller
  0 siblings, 0 replies; 7+ messages in thread
From: David Miller @ 2012-08-20  9:37 UTC (permalink / raw)
  To: eric; +Cc: netdev

From: Eric Leblond <eric@regit.org>
Date: Fri, 17 Aug 2012 10:02:58 +0200

> If a packet is emitted on one socket in one group of fanout sockets,
> it is transmitted again. It is thus read again on one of the sockets
> of the fanout group. This result in a loop for software which
> generate packets when receiving one.
> This retransmission is not the intended behavior: a fanout group
> must behave like a single socket. The packet should not be
> transmitted on a socket if it originates from a socket belonging
> to the same fanout group.
> 
> This patch fixes the issue by changing the transmission check to
> take fanout group info account.
> 
> Reported-by: Aleksandr Kotov <a1k@mail.ru>
> Signed-off-by: Eric Leblond <eric@regit.org>

Applied, thanks a lot Eric.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2012-08-20  9:37 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-08-06 14:21 [BUG] af_packet: loop at reception when using fanout Eric Leblond
2012-08-16 15:44 ` [RFC PATCH v1.0] af_packet: don't emit packet on orig fanout group Eric Leblond
2012-08-16 21:52   ` David Miller
2012-08-16 21:52     ` David Miller
2012-08-17  8:02       ` [PATCH v1.1] " Eric Leblond
2012-08-20  9:37         ` David Miller
2012-08-17  8:03     ` [RFC PATCH v1.0] " Ying Xue

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).