netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PACKET]: Add PACKET_AUXDATA cmsg
@ 2007-01-10  2:54 Herbert Xu
  2007-01-10  9:27 ` David Miller
  2007-01-23 23:38 ` Herbert Xu
  0 siblings, 2 replies; 4+ messages in thread
From: Herbert Xu @ 2007-01-10  2:54 UTC (permalink / raw)
  To: David S. Miller, netdev

Hi Dave:

[PACKET]: Add optional checksum computation for recvmsg

This patch is needed to make ISC's DHCP server (and probably other
DHCP servers/clients using AF_PACKET) to be able to serve another
client on the same Xen host.

The problem is that packets between different domains on the same
Xen host only have partial checksums.  Unfortunately this piece of
information is not passed along in AF_PACKET unless you're using
the mmap interface.  Since dhcpd doesn't support packet-mmap, UDP
packets from the same host come out with apparently bogus checksums.

This patch adds a mechanism for AF_PACKET recvmsg(2) to return the
status along with the packet.  It does so by adding a new cmsg that
contains this information along with some other relevant data such
as the original packet length.

I didn't include the time stamp information since there is already
a cmsg for that.

This patch also changes the mmap code to set the CSUMNOTREADY flag
on all packets instead of just outoing packets on cooked sockets.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index 99393ef..f3de05c 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -41,6 +41,7 @@ struct sockaddr_ll
 #define PACKET_RX_RING			5
 #define PACKET_STATISTICS		6
 #define PACKET_COPY_THRESH		7
+#define PACKET_AUXDATA			8
 
 struct tpacket_stats
 {
@@ -48,6 +49,15 @@ struct tpacket_stats
 	unsigned int	tp_drops;
 };
 
+struct tpacket_auxdata
+{
+	__u32		tp_status;
+	__u32		tp_len;
+	__u32		tp_snaplen;
+	__u16		tp_mac;
+	__u16		tp_net;
+};
+
 struct tpacket_hdr
 {
 	unsigned long	tp_status;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index da73e8a..dab117e 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -200,7 +200,8 @@ struct packet_sock {
 #endif
 	struct packet_type	prot_hook;
 	spinlock_t		bind_lock;
-	char			running;	/* prot_hook is attached*/
+	unsigned int		running:1,	/* prot_hook is attached*/
+				auxdata:1;
 	int			ifindex;	/* bound device		*/
 	__be16			num;
 #ifdef CONFIG_PACKET_MULTICAST
@@ -214,6 +215,8 @@ struct packet_sock {
 #endif
 };
 
+#define PACKET_SKB_CB(__skb)	((struct tpacket_auxdata *)((__skb)->cb))
+
 #ifdef CONFIG_PACKET_MMAP
 
 static inline char *packet_lookup_frame(struct packet_sock *po, unsigned int position)
@@ -468,6 +471,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
 	u8 * skb_head = skb->data;
 	int skb_len = skb->len;
 	unsigned snaplen;
+	struct tpacket_auxdata *aux;
 
 	if (skb->pkt_type == PACKET_LOOPBACK)
 		goto drop;
@@ -526,6 +530,15 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
 	if (dev->hard_header_parse)
 		sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
 
+	aux = PACKET_SKB_CB(skb);
+	aux->tp_status = TP_STATUS_USER;
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		aux->tp_status |= TP_STATUS_CSUMNOTREADY;
+	aux->tp_len = skb->len;
+	aux->tp_snaplen = snaplen;
+	aux->tp_mac = 0;
+	aux->tp_net = skb->nh.raw - skb->data;
+
 	if (pskb_trim(skb, snaplen))
 		goto drop_n_acct;
 
@@ -585,11 +598,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
 		else if (skb->pkt_type == PACKET_OUTGOING) {
 			/* Special case: outgoing packets have ll header at head */
 			skb_pull(skb, skb->nh.raw - skb->data);
-			if (skb->ip_summed == CHECKSUM_PARTIAL)
-				status |= TP_STATUS_CSUMNOTREADY;
 		}
 	}
 
+	if (skb->ip_summed == CHECKSUM_PARTIAL)
+		status |= TP_STATUS_CSUMNOTREADY;
+
 	snaplen = skb->len;
 
 	if (run_filter(skb, sk, &snaplen) < 0)
@@ -1119,6 +1133,11 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (msg->msg_name)
 		memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
 
+	if (pkt_sk(sk)->auxdata) {
+		struct tpacket_auxdata *aux = PACKET_SKB_CB(skb);
+		put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(*aux), aux);
+	}
+
 	/*
 	 *	Free or return the buffer as appropriate. Again this
 	 *	hides all the races and re-entrancy issues from us.
@@ -1317,6 +1336,7 @@ static int
 packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
 {
 	struct sock *sk = sock->sk;
+	struct packet_sock *po = pkt_sk(sk);
 	int ret;
 
 	if (level != SOL_PACKET)
@@ -1369,6 +1389,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 		return 0;
 	}
 #endif
+	case PACKET_AUXDATA:
+	{
+		int val;
+
+		if (optlen < sizeof(val))
+			return -EINVAL;
+		if (copy_from_user(&val, optval, sizeof(val)))
+			return -EFAULT;
+
+		po->auxdata = !!val;
+		return 0;
+	}
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -1378,8 +1410,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 			     char __user *optval, int __user *optlen)
 {
 	int len;
+	int val;
 	struct sock *sk = sock->sk;
 	struct packet_sock *po = pkt_sk(sk);
+	void *data;
+	struct tpacket_stats st;
 
 	if (level != SOL_PACKET)
 		return -ENOPROTOOPT;
@@ -1392,9 +1427,6 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 		
 	switch(optname)	{
 	case PACKET_STATISTICS:
-	{
-		struct tpacket_stats st;
-
 		if (len > sizeof(struct tpacket_stats))
 			len = sizeof(struct tpacket_stats);
 		spin_lock_bh(&sk->sk_receive_queue.lock);
@@ -1403,16 +1435,23 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 		spin_unlock_bh(&sk->sk_receive_queue.lock);
 		st.tp_packets += st.tp_drops;
 
-		if (copy_to_user(optval, &st, len))
-			return -EFAULT;
+		data = &st;
+		break;
+	case PACKET_AUXDATA:
+		if (len > sizeof(int))
+			len = sizeof(int);
+		val = po->auxdata;
+
+		data = &val;
 		break;
-	}
 	default:
 		return -ENOPROTOOPT;
 	}
 
 	if (put_user(len, optlen))
 		return -EFAULT;
+	if (copy_to_user(optval, data, len))
+		return -EFAULT;
 	return 0;
 }
 

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PACKET]: Add PACKET_AUXDATA cmsg
  2007-01-10  2:54 [PACKET]: Add PACKET_AUXDATA cmsg Herbert Xu
@ 2007-01-10  9:27 ` David Miller
  2007-01-23 23:38 ` Herbert Xu
  1 sibling, 0 replies; 4+ messages in thread
From: David Miller @ 2007-01-10  9:27 UTC (permalink / raw)
  To: herbert; +Cc: netdev

From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 10 Jan 2007 13:54:35 +1100

> [PACKET]: Add optional checksum computation for recvmsg

This looks good, thanks Herbert.

I'll queue it up for 2.6.21

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PACKET]: Add PACKET_AUXDATA cmsg
  2007-01-10  2:54 [PACKET]: Add PACKET_AUXDATA cmsg Herbert Xu
  2007-01-10  9:27 ` David Miller
@ 2007-01-23 23:38 ` Herbert Xu
  2007-02-05  7:33   ` David Miller
  1 sibling, 1 reply; 4+ messages in thread
From: Herbert Xu @ 2007-01-23 23:38 UTC (permalink / raw)
  To: David S. Miller, netdev

On Wed, Jan 10, 2007 at 01:54:35PM +1100, Herbert Xu wrote:
> 
> [PACKET]: Add optional checksum computation for recvmsg

Unfortunately I missed the fact the skb->cb is already used to store
the sockaddr.  This patch fixes it up.

[PACKET]: Fix skb->cb clobbering between aux and sockaddr

Both aux data and sockaddr tries to use the same buffer which
obviously doesn't work.  We just happen to have 4 bytes free in
the skb->cb if you take away the maximum length of sockaddr_ll.
That's just enough to store the one piece of info from aux data
that we can't generate at recvmsg(2) time.

This is what the following patch does.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index dab117e..4c7f9d7 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -60,6 +60,7 @@
 #include <linux/netdevice.h>
 #include <linux/if_packet.h>
 #include <linux/wireless.h>
+#include <linux/kernel.h>
 #include <linux/kmod.h>
 #include <net/ip.h>
 #include <net/protocol.h>
@@ -215,7 +216,15 @@ struct packet_sock {
 #endif
 };
 
-#define PACKET_SKB_CB(__skb)	((struct tpacket_auxdata *)((__skb)->cb))
+struct packet_skb_cb {
+	unsigned int origlen;
+	union {
+		struct sockaddr_pkt pkt;
+		struct sockaddr_ll ll;
+	} sa;
+};
+
+#define PACKET_SKB_CB(__skb)	((struct packet_skb_cb *)((__skb)->cb))
 
 #ifdef CONFIG_PACKET_MMAP
 
@@ -296,7 +305,7 @@ static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct
 	/* drop conntrack reference */
 	nf_reset(skb);
 
-	spkt = (struct sockaddr_pkt*)skb->cb;
+	spkt = &PACKET_SKB_CB(skb)->sa.pkt;
 
 	skb_push(skb, skb->data-skb->mac.raw);
 
@@ -471,7 +480,6 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
 	u8 * skb_head = skb->data;
 	int skb_len = skb->len;
 	unsigned snaplen;
-	struct tpacket_auxdata *aux;
 
 	if (skb->pkt_type == PACKET_LOOPBACK)
 		goto drop;
@@ -519,7 +527,10 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
 		skb = nskb;
 	}
 
-	sll = (struct sockaddr_ll*)skb->cb;
+	BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
+		     sizeof(skb->cb));
+
+	sll = &PACKET_SKB_CB(skb)->sa.ll;
 	sll->sll_family = AF_PACKET;
 	sll->sll_hatype = dev->type;
 	sll->sll_protocol = skb->protocol;
@@ -530,14 +541,7 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet
 	if (dev->hard_header_parse)
 		sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
 
-	aux = PACKET_SKB_CB(skb);
-	aux->tp_status = TP_STATUS_USER;
-	if (skb->ip_summed == CHECKSUM_PARTIAL)
-		aux->tp_status |= TP_STATUS_CSUMNOTREADY;
-	aux->tp_len = skb->len;
-	aux->tp_snaplen = snaplen;
-	aux->tp_mac = 0;
-	aux->tp_net = skb->nh.raw - skb->data;
+	PACKET_SKB_CB(skb)->origlen = skb->len;
 
 	if (pskb_trim(skb, snaplen))
 		goto drop_n_acct;
@@ -1106,7 +1110,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	 *	it in now.
 	 */
 
-	sll = (struct sockaddr_ll*)skb->cb;
+	sll = &PACKET_SKB_CB(skb)->sa.ll;
 	if (sock->type == SOCK_PACKET)
 		msg->msg_namelen = sizeof(struct sockaddr_pkt);
 	else
@@ -1131,11 +1135,21 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	sock_recv_timestamp(msg, sk, skb);
 
 	if (msg->msg_name)
-		memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
+		memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
+		       msg->msg_namelen);
 
 	if (pkt_sk(sk)->auxdata) {
-		struct tpacket_auxdata *aux = PACKET_SKB_CB(skb);
-		put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(*aux), aux);
+		struct tpacket_auxdata aux;
+
+		aux.tp_status = TP_STATUS_USER;
+		if (skb->ip_summed == CHECKSUM_PARTIAL)
+			aux.tp_status |= TP_STATUS_CSUMNOTREADY;
+		aux.tp_len = PACKET_SKB_CB(skb)->origlen;
+		aux.tp_snaplen = skb->len;
+		aux.tp_mac = 0;
+		aux.tp_net = skb->nh.raw - skb->data;
+
+		put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
 	}
 
 	/*

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PACKET]: Add PACKET_AUXDATA cmsg
  2007-01-23 23:38 ` Herbert Xu
@ 2007-02-05  7:33   ` David Miller
  0 siblings, 0 replies; 4+ messages in thread
From: David Miller @ 2007-02-05  7:33 UTC (permalink / raw)
  To: herbert; +Cc: netdev

From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 24 Jan 2007 10:38:34 +1100

> On Wed, Jan 10, 2007 at 01:54:35PM +1100, Herbert Xu wrote:
> > 
> > [PACKET]: Add optional checksum computation for recvmsg
> 
> Unfortunately I missed the fact the skb->cb is already used to store
> the sockaddr.  This patch fixes it up.

Both patch applied, thanks Herbert.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2007-02-05  7:33 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-01-10  2:54 [PACKET]: Add PACKET_AUXDATA cmsg Herbert Xu
2007-01-10  9:27 ` David Miller
2007-01-23 23:38 ` Herbert Xu
2007-02-05  7:33   ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).