netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next v2] packet: tx_ring: allow the user to choose tx data offset
@ 2012-10-22  6:56 Paul Chavent
  2012-10-31 17:14 ` David Miller
  0 siblings, 1 reply; 4+ messages in thread
From: Paul Chavent @ 2012-10-22  6:56 UTC (permalink / raw)
  To: davem, netdev; +Cc: Paul Chavent

The tx data offset of packet mmap tx ring used to be :
(TPACKET2_HDRLEN - sizeof(struct sockaddr_ll))

The problem is that, with SOCK_RAW socket, the payload (14 bytes after
the beginning of the user data) is misaligned.

This patch allows to let the user gives an offset for it's tx data if
he desires.

Set sock option PACKET_TX_HAS_OFF to 1, then specify in each frame of
your tx ring tp_net for SOCK_DGRAM, or tp_mac for SOCK_RAW.

Signed-off-by: Paul Chavent <paul.chavent@onera.fr>
---
 Documentation/networking/packet_mmap.txt | 13 +++++++++
 include/uapi/linux/if_packet.h           |  1 +
 net/packet/af_packet.c                   | 49 +++++++++++++++++++++++++++++++-
 net/packet/internal.h                    |  1 +
 4 files changed, 63 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
index 1c08a4b..7cd879e 100644
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -163,6 +163,19 @@ As capture, each frame contains two parts:
 
  A complete tutorial is available at: http://wiki.gnu-log.net/
 
+By default, the user should put data at :
+ frame base + TPACKET_HDRLEN - sizeof(struct sockaddr_ll)
+
+So, whatever you choose for the socket mode (SOCK_DGRAM or SOCK_RAW),
+the beginning of the user data will be at :
+ frame base + TPACKET_ALIGN(sizeof(struct tpacket_hdr))
+
+If you wish to put user data at a custom offset from the beginning of
+the frame (for payload alignment with SOCK_RAW mode for instance) you
+can set tp_net (with SOCK_DGRAM) or tp_mac (with SOCK_RAW). In order
+to make this work it must be enabled previously with setsockopt()
+and the PACKET_TX_HAS_OFF option.
+
 --------------------------------------------------------------------------------
 + PACKET_MMAP settings
 --------------------------------------------------------------------------------
diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index f379929..f9a6037 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -50,6 +50,7 @@ struct sockaddr_ll {
 #define PACKET_TX_TIMESTAMP		16
 #define PACKET_TIMESTAMP		17
 #define PACKET_FANOUT			18
+#define PACKET_TX_HAS_OFF		19
 
 #define PACKET_FANOUT_HASH		0
 #define PACKET_FANOUT_LB		1
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 94060ed..7ac7119 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1881,7 +1881,38 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 	skb_reserve(skb, hlen);
 	skb_reset_network_header(skb);
 
-	data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
+	if (po->tp_tx_has_off) {
+		int off_min, off_max, off;
+		off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
+		off_max = po->tx_ring.frame_size - tp_len;
+		if (sock->type == SOCK_DGRAM) {
+			switch (po->tp_version) {
+			case TPACKET_V2:
+				off = ph.h2->tp_net;
+				break;
+			default:
+				off = ph.h1->tp_net;
+				break;
+			}
+		} else {
+			switch (po->tp_version) {
+			case TPACKET_V2:
+				off = ph.h2->tp_mac;
+				break;
+			default:
+				off = ph.h1->tp_mac;
+				break;
+			}
+		}
+		if (unlikely((off < off_min) || (off_max < off))) {
+			pr_err("payload offset (%d) out of range [%d;%d]\n",
+				off, off_min, off_max);
+			return -EINVAL;
+		}
+		data = ph.raw + off;
+	} else {
+		data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
+	}
 	to_write = tp_len;
 
 	if (sock->type == SOCK_DGRAM) {
@@ -3111,6 +3142,19 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 
 		return fanout_add(sk, val & 0xffff, val >> 16);
 	}
+	case PACKET_TX_HAS_OFF:
+	{
+		unsigned int val;
+
+		if (optlen != sizeof(val))
+			return -EINVAL;
+		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
+			return -EBUSY;
+		if (copy_from_user(&val, optval, sizeof(val)))
+			return -EFAULT;
+		po->tp_tx_has_off = !!val;
+		return 0;
+	}
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -3202,6 +3246,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 			((u32)po->fanout->type << 16)) :
 		       0);
 		break;
+	case PACKET_TX_HAS_OFF:
+		val = po->tp_tx_has_off;
+		break;
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/packet/internal.h b/net/packet/internal.h
index 44945f6..169e60d 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -110,6 +110,7 @@ struct packet_sock {
 	unsigned int		tp_reserve;
 	unsigned int		tp_loss:1;
 	unsigned int		tp_tstamp;
+	unsigned int		tp_tx_has_off:1;
 	struct packet_type	prot_hook ____cacheline_aligned_in_smp;
 };
 
-- 
1.7.12.1

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH net-next v2] packet: tx_ring: allow the user to choose tx data offset
  2012-10-22  6:56 [PATCH net-next v2] packet: tx_ring: allow the user to choose tx data offset Paul Chavent
@ 2012-10-31 17:14 ` David Miller
  2012-10-31 21:22   ` Paul Chavent
  0 siblings, 1 reply; 4+ messages in thread
From: David Miller @ 2012-10-31 17:14 UTC (permalink / raw)
  To: Paul.Chavent; +Cc: netdev

From: Paul Chavent <Paul.Chavent@onera.fr>
Date: Mon, 22 Oct 2012 08:56:54 +0200

> +		if (unlikely((off < off_min) || (off_max < off))) {
> +			pr_err("payload offset (%d) out of range [%d;%d]\n",
> +				off, off_min, off_max);
> +			return -EINVAL;
> +		}

Users should not be able to spam the kernel log with error messages
by simply make setsockopt() calls.

This error log is inappropriate.
> @@ -110,6 +110,7 @@ struct packet_sock {
>  	unsigned int		tp_reserve;
>  	unsigned int		tp_loss:1;
>  	unsigned int		tp_tstamp;
> +	unsigned int		tp_tx_has_off:1;
>  	struct packet_type	prot_hook ____cacheline_aligned_in_smp;
>  };

Please put the new bit field next to other existing bit fields so that
there is less wasted space in the struct.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH net-next v2] packet: tx_ring: allow the user to choose tx data offset
  2012-10-31 17:14 ` David Miller
@ 2012-10-31 21:22   ` Paul Chavent
  2012-11-01 14:54     ` David Miller
  0 siblings, 1 reply; 4+ messages in thread
From: Paul Chavent @ 2012-10-31 21:22 UTC (permalink / raw)
  To: David Miller; +Cc: Paul.Chavent, netdev

On 10/31/2012 06:14 PM, David Miller wrote:
> From: Paul Chavent <Paul.Chavent@onera.fr>
> Date: Mon, 22 Oct 2012 08:56:54 +0200
>
>> +		if (unlikely((off < off_min) || (off_max < off))) {
>> +			pr_err("payload offset (%d) out of range [%d;%d]\n",
>> +				off, off_min, off_max);
>> +			return -EINVAL;
>> +		}
>
> Users should not be able to spam the kernel log with error messages
> by simply make setsockopt() calls.
>
> This error log is inappropriate.

This error log is thrown when the user call send, not setsockopt !

It seems that the user can already fire an error msg by simply pass a too long tp_len.

Though, should i remove it from the tpacket_fill_skb function and silently return -EINVAL ?


>> @@ -110,6 +110,7 @@ struct packet_sock {
>>   	unsigned int		tp_reserve;
>>   	unsigned int		tp_loss:1;
>>   	unsigned int		tp_tstamp;
>> +	unsigned int		tp_tx_has_off:1;
>>   	struct packet_type	prot_hook ____cacheline_aligned_in_smp;
>>   };
>
> Please put the new bit field next to other existing bit fields so that
> there is less wasted space in the struct.

OK.

> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH net-next v2] packet: tx_ring: allow the user to choose tx data offset
  2012-10-31 21:22   ` Paul Chavent
@ 2012-11-01 14:54     ` David Miller
  0 siblings, 0 replies; 4+ messages in thread
From: David Miller @ 2012-11-01 14:54 UTC (permalink / raw)
  To: paul.chavent; +Cc: Paul.Chavent, netdev

From: Paul Chavent <paul.chavent@fnac.net>
Date: Wed, 31 Oct 2012 22:22:07 +0100

> It seems that the user can already fire an error msg by simply pass a
> too long tp_len.
> 
> Though, should i remove it from the tpacket_fill_skb function and
> silently return -EINVAL ?

Returning an error is much better than emitting log messages,
always.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2012-11-01 14:54 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-10-22  6:56 [PATCH net-next v2] packet: tx_ring: allow the user to choose tx data offset Paul Chavent
2012-10-31 17:14 ` David Miller
2012-10-31 21:22   ` Paul Chavent
2012-11-01 14:54     ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).