Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next v2 02/10] net: qualcomm: rmnet: Remove invalid condition while stamping mux id
From: Subash Abhinov Kasiviswanathan @ 2018-01-03 21:42 UTC (permalink / raw)
  To: davem, netdev, lkp; +Cc: Subash Abhinov Kasiviswanathan
In-Reply-To: <1515015787-6713-1-git-send-email-subashab@codeaurora.org>

rmnet devices cannot have a mux id of 255. This is validated when
assigning the mux id to the rmnet devices. As a result, checking for
mux id 255 does not apply in egress path.

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 0553932..b2d317e3 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -143,10 +143,7 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
 	if (!map_header)
 		goto fail;
 
-	if (mux_id == 0xff)
-		map_header->mux_id = 0;
-	else
-		map_header->mux_id = mux_id;
+	map_header->mux_id = mux_id;
 
 	skb->protocol = htons(ETH_P_MAP);
 
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next v2 03/10] net: qualcomm: rmnet: Remove unused function declaration
From: Subash Abhinov Kasiviswanathan @ 2018-01-03 21:43 UTC (permalink / raw)
  To: davem, netdev, lkp; +Cc: Subash Abhinov Kasiviswanathan
In-Reply-To: <1515015787-6713-1-git-send-email-subashab@codeaurora.org>

rmnet_map_demultiplex() is only declared but not defined anywhere,
so remove it.

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
index 4df359d..ef0eff2 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
@@ -67,7 +67,6 @@ struct rmnet_map_header {
 #define RMNET_MAP_NO_PAD_BYTES        0
 #define RMNET_MAP_ADD_PAD_BYTES       1
 
-u8 rmnet_map_demultiplex(struct sk_buff *skb);
 struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb);
 struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb,
 						  int hdrlen, int pad);
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next v2 04/10] net: qualcomm: rmnet: Rename ingress data format to data format
From: Subash Abhinov Kasiviswanathan @ 2018-01-03 21:43 UTC (permalink / raw)
  To: davem, netdev, lkp; +Cc: Subash Abhinov Kasiviswanathan
In-Reply-To: <1515015787-6713-1-git-send-email-subashab@codeaurora.org>

This is done so that we can use this field for both ingress and
egress flags.

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c   | 10 +++++-----
 drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h   |  2 +-
 drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c |  5 ++---
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
index cedacdd..7e7704d 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.c
@@ -143,7 +143,7 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
 			 struct nlattr *tb[], struct nlattr *data[],
 			 struct netlink_ext_ack *extack)
 {
-	int ingress_format = RMNET_INGRESS_FORMAT_DEAGGREGATION;
+	u32 data_format = RMNET_INGRESS_FORMAT_DEAGGREGATION;
 	struct net_device *real_dev;
 	int mode = RMNET_EPMODE_VND;
 	struct rmnet_endpoint *ep;
@@ -185,11 +185,11 @@ static int rmnet_newlink(struct net *src_net, struct net_device *dev,
 		struct ifla_vlan_flags *flags;
 
 		flags = nla_data(data[IFLA_VLAN_FLAGS]);
-		ingress_format = flags->flags & flags->mask;
+		data_format = flags->flags & flags->mask;
 	}
 
-	netdev_dbg(dev, "data format [ingress 0x%08X]\n", ingress_format);
-	port->ingress_data_format = ingress_format;
+	netdev_dbg(dev, "data format [0x%08X]\n", data_format);
+	port->data_format = data_format;
 
 	return 0;
 
@@ -353,7 +353,7 @@ static int rmnet_changelink(struct net_device *dev, struct nlattr *tb[],
 		struct ifla_vlan_flags *flags;
 
 		flags = nla_data(data[IFLA_VLAN_FLAGS]);
-		port->ingress_data_format = flags->flags & flags->mask;
+		port->data_format = flags->flags & flags->mask;
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
index 2ea9fe3..00e4634 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_config.h
@@ -32,7 +32,7 @@ struct rmnet_endpoint {
  */
 struct rmnet_port {
 	struct net_device *dev;
-	u32 ingress_data_format;
+	u32 data_format;
 	u8 nr_rmnet_devs;
 	u8 rmnet_mode;
 	struct hlist_head muxed_ep[RMNET_MAX_LOGICAL_EP];
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index b2d317e3..8e1f43a 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -69,8 +69,7 @@ static void rmnet_set_skb_proto(struct sk_buff *skb)
 	u16 len;
 
 	if (RMNET_MAP_GET_CD_BIT(skb)) {
-		if (port->ingress_data_format
-		    & RMNET_INGRESS_FORMAT_MAP_COMMANDS)
+		if (port->data_format & RMNET_INGRESS_FORMAT_MAP_COMMANDS)
 			return rmnet_map_command(skb, port);
 
 		goto free_skb;
@@ -114,7 +113,7 @@ static void rmnet_set_skb_proto(struct sk_buff *skb)
 		skb_push(skb, ETH_HLEN);
 	}
 
-	if (port->ingress_data_format & RMNET_INGRESS_FORMAT_DEAGGREGATION) {
+	if (port->data_format & RMNET_INGRESS_FORMAT_DEAGGREGATION) {
 		while ((skbn = rmnet_map_deaggregate(skb)) != NULL)
 			__rmnet_map_ingress_handler(skbn, port);
 
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next v2 05/10] net: qualcomm: rmnet: Set pacing rate
From: Subash Abhinov Kasiviswanathan @ 2018-01-03 21:43 UTC (permalink / raw)
  To: davem, netdev, lkp; +Cc: Subash Abhinov Kasiviswanathan
In-Reply-To: <1515015787-6713-1-git-send-email-subashab@codeaurora.org>

With a default pacing rate of 10, the uplink data rate for a single
TCP stream is around 10Mbps. Setting it to 8 increases it to 146Mbps
which is the maximum supported transmit rate.

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 8e1f43a..8f8c4f2 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -16,6 +16,7 @@
 #include <linux/netdevice.h>
 #include <linux/netdev_features.h>
 #include <linux/if_arp.h>
+#include <net/sock.h>
 #include "rmnet_private.h"
 #include "rmnet_config.h"
 #include "rmnet_vnd.h"
@@ -204,6 +205,8 @@ void rmnet_egress_handler(struct sk_buff *skb)
 	struct rmnet_priv *priv;
 	u8 mux_id;
 
+	sk_pacing_shift_update(skb->sk, 8);
+
 	orig_dev = skb->dev;
 	priv = netdev_priv(orig_dev);
 	skb->dev = priv->real_dev;
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next v2 06/10] net: qualcomm: rmnet: Define the MAPv4 packet formats
From: Subash Abhinov Kasiviswanathan @ 2018-01-03 21:43 UTC (permalink / raw)
  To: davem, netdev, lkp; +Cc: Subash Abhinov Kasiviswanathan
In-Reply-To: <1515015787-6713-1-git-send-email-subashab@codeaurora.org>

The MAPv4 packet format adds support for RX / TX checksum offload.
For a bi-directional UDP stream at a rate of 570 / 146 Mbps, roughly
10% CPU cycles are saved.

For receive path, there is a checksum trailer appended to the end of
the MAP packet. The valid field indicates if hardware has computed
the checksum. csum_start_offset indicates the offset from the start
of the IP header from which hardware has computed checksum.
csum_length is the number of bytes over which the checksum was
computed and the resulting value is csum_value.

In the transmit path, a header is appended between the end of the MAP
header and the start of the IP packet. csum_start_offset is the offset
in bytes from which hardware will compute the checksum if the
csum_enabled bit is set. udp_ip4_ind indicates if the checksum
value of 0 is valid or not. csum_insert_offset is the offset from the
csum_start_offset where hardware will insert the computed checksum.

The use of this additional packet format for checksum offload is
explained in subsequent patches.

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h     | 16 ++++++++++++++++
 drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h |  2 ++
 2 files changed, 18 insertions(+)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
index ef0eff2..50c50cd 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
@@ -47,6 +47,22 @@ struct rmnet_map_header {
 	u16 pkt_len;
 }  __aligned(1);
 
+struct rmnet_map_dl_csum_trailer {
+	u8  reserved1;
+	u8  valid:1;
+	u8  reserved2:7;
+	u16 csum_start_offset;
+	u16 csum_length;
+	__be16 csum_value;
+} __aligned(1);
+
+struct rmnet_map_ul_csum_header {
+	__be16 csum_start_offset;
+	u16 csum_insert_offset:14;
+	u16 udp_ip4_ind:1;
+	u16 csum_enabled:1;
+} __aligned(1);
+
 #define RMNET_MAP_GET_MUX_ID(Y) (((struct rmnet_map_header *) \
 				 (Y)->data)->mux_id)
 #define RMNET_MAP_GET_CD_BIT(Y) (((struct rmnet_map_header *) \
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
index d214280..de0143e 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_private.h
@@ -21,6 +21,8 @@
 /* Constants */
 #define RMNET_INGRESS_FORMAT_DEAGGREGATION      BIT(0)
 #define RMNET_INGRESS_FORMAT_MAP_COMMANDS       BIT(1)
+#define RMNET_INGRESS_FORMAT_MAP_CKSUMV4        BIT(2)
+#define RMNET_EGRESS_FORMAT_MAP_CKSUMV4         BIT(3)
 
 /* Replace skb->dev to a virtual rmnet device and pass up the stack */
 #define RMNET_EPMODE_VND (1)
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next v2 07/10] net: qualcomm: rmnet: Add support for RX checksum offload
From: Subash Abhinov Kasiviswanathan @ 2018-01-03 21:43 UTC (permalink / raw)
  To: davem, netdev, lkp; +Cc: Subash Abhinov Kasiviswanathan
In-Reply-To: <1515015787-6713-1-git-send-email-subashab@codeaurora.org>

When using the MAPv4 packet format, receive checksum offload can be
enabled in hardware. The checksum computation over pseudo header is
not offloaded but the rest of the checksum computation over
the payload is offloaded. This applies only for TCP / UDP packets
which are not fragmented.

rmnet validates the TCP/UDP checksum for the packet using the checksum
from the checksum trailer added to the packet by hardware. The
validation performed is as following -

1. Perform 1's complement over the checksum value from the trailer
2. Compute 1's complement checksum over IPv4 / IPv6 header and
   subtracts it from the value from step 1
3. Computes 1's complement checksum over IPv4 / IPv6 pseudo header and
   adds it to the value from step 2
4. Subtracts the checksum value from the TCP / UDP header from the
   value from step 3.
5. Compares the value from step 4 to the checksum value from the
   TCP / UDP header.
6. If the comparison in step 5 succeeds, CHECKSUM_UNNECESSARY is set
   and the packet is passed on to network stack. If there is a
   failure, then the packet is passed on as such without modifying
   the ip_summed field.

The checksum field is also checked for UDP checksum 0 as per RFC 768
and for unexpected TCP checksum of 0.

If checksum offload is disabled when using MAPv4 packet format in
receive path, the packet is queued as is to network stack without
the validations above.

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
---
 .../net/ethernet/qualcomm/rmnet/rmnet_handlers.c   |  15 +-
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h    |   4 +-
 .../net/ethernet/qualcomm/rmnet/rmnet_map_data.c   | 186 ++++++++++++++++++++-
 drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c    |   2 +
 4 files changed, 201 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 8f8c4f2..3409458 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -66,8 +66,8 @@ static void rmnet_set_skb_proto(struct sk_buff *skb)
 			    struct rmnet_port *port)
 {
 	struct rmnet_endpoint *ep;
+	u16 len, pad;
 	u8 mux_id;
-	u16 len;
 
 	if (RMNET_MAP_GET_CD_BIT(skb)) {
 		if (port->data_format & RMNET_INGRESS_FORMAT_MAP_COMMANDS)
@@ -77,7 +77,8 @@ static void rmnet_set_skb_proto(struct sk_buff *skb)
 	}
 
 	mux_id = RMNET_MAP_GET_MUX_ID(skb);
-	len = RMNET_MAP_GET_LENGTH(skb) - RMNET_MAP_GET_PAD(skb);
+	pad = RMNET_MAP_GET_PAD(skb);
+	len = RMNET_MAP_GET_LENGTH(skb) - pad;
 
 	if (mux_id >= RMNET_MAX_LOGICAL_EP)
 		goto free_skb;
@@ -90,8 +91,14 @@ static void rmnet_set_skb_proto(struct sk_buff *skb)
 
 	/* Subtract MAP header */
 	skb_pull(skb, sizeof(struct rmnet_map_header));
-	skb_trim(skb, len);
 	rmnet_set_skb_proto(skb);
+
+	if (port->data_format & RMNET_INGRESS_FORMAT_MAP_CKSUMV4) {
+		if (!rmnet_map_checksum_downlink_packet(skb, len + pad))
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+
+	skb_trim(skb, len);
 	rmnet_deliver_skb(skb);
 	return;
 
@@ -115,7 +122,7 @@ static void rmnet_set_skb_proto(struct sk_buff *skb)
 	}
 
 	if (port->data_format & RMNET_INGRESS_FORMAT_DEAGGREGATION) {
-		while ((skbn = rmnet_map_deaggregate(skb)) != NULL)
+		while ((skbn = rmnet_map_deaggregate(skb, port)) != NULL)
 			__rmnet_map_ingress_handler(skbn, port);
 
 		consume_skb(skb);
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
index 50c50cd..ca9f473 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
@@ -83,9 +83,11 @@ struct rmnet_map_ul_csum_header {
 #define RMNET_MAP_NO_PAD_BYTES        0
 #define RMNET_MAP_ADD_PAD_BYTES       1
 
-struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb);
+struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb,
+				      struct rmnet_port *port);
 struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb,
 						  int hdrlen, int pad);
 void rmnet_map_command(struct sk_buff *skb, struct rmnet_port *port);
+int rmnet_map_checksum_downlink_packet(struct sk_buff *skb, u16 len);
 
 #endif /* _RMNET_MAP_H_ */
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 978ce26..881c1dc 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -14,6 +14,9 @@
  */
 
 #include <linux/netdevice.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/ip6_checksum.h>
 #include "rmnet_config.h"
 #include "rmnet_map.h"
 #include "rmnet_private.h"
@@ -21,6 +24,153 @@
 #define RMNET_MAP_DEAGGR_SPACING  64
 #define RMNET_MAP_DEAGGR_HEADROOM (RMNET_MAP_DEAGGR_SPACING / 2)
 
+static __sum16 *rmnet_map_get_csum_field(unsigned char protocol,
+					 const void *txporthdr)
+{
+	__sum16 *check = NULL;
+
+	switch (protocol) {
+	case IPPROTO_TCP:
+		check = &(((struct tcphdr *)txporthdr)->check);
+		break;
+
+	case IPPROTO_UDP:
+		check = &(((struct udphdr *)txporthdr)->check);
+		break;
+
+	default:
+		check = NULL;
+		break;
+	}
+
+	return check;
+}
+
+static int
+rmnet_map_ipv4_dl_csum_trailer(struct sk_buff *skb,
+			       struct rmnet_map_dl_csum_trailer *csum_trailer)
+{
+	__sum16 *csum_field, csum_temp, pseudo_csum, hdr_csum, ip_payload_csum;
+	u16 csum_value, csum_value_final;
+	struct iphdr *ip4h;
+	void *txporthdr;
+	__be16 addend;
+
+	ip4h = (struct iphdr *)(skb->data);
+	if ((ntohs(ip4h->frag_off) & IP_MF) ||
+	    ((ntohs(ip4h->frag_off) & IP_OFFSET) > 0))
+		return -EOPNOTSUPP;
+
+	txporthdr = skb->data + ip4h->ihl * 4;
+
+	csum_field = rmnet_map_get_csum_field(ip4h->protocol, txporthdr);
+
+	if (!csum_field)
+		return -EPROTONOSUPPORT;
+
+	/* RFC 768 - Skip IPv4 UDP packets where sender checksum field is 0 */
+	if (*csum_field == 0 && ip4h->protocol == IPPROTO_UDP)
+		return 0;
+
+	csum_value = ~ntohs(csum_trailer->csum_value);
+	hdr_csum = ~ip_fast_csum(ip4h, (int)ip4h->ihl);
+	ip_payload_csum = csum16_sub((__force __sum16)csum_value,
+				     (__force __be16)hdr_csum);
+
+	pseudo_csum = ~csum_tcpudp_magic(ip4h->saddr, ip4h->daddr,
+					 ntohs(ip4h->tot_len) - ip4h->ihl * 4,
+					 ip4h->protocol, 0);
+	addend = (__force __be16)ntohs((__force __be16)pseudo_csum);
+	pseudo_csum = csum16_add(ip_payload_csum, addend);
+
+	addend = (__force __be16)ntohs((__force __be16)*csum_field);
+	csum_temp = ~csum16_sub(pseudo_csum, addend);
+	csum_value_final = (__force u16)csum_temp;
+
+	if (unlikely(csum_value_final == 0)) {
+		switch (ip4h->protocol) {
+		case IPPROTO_UDP:
+			/* RFC 768 - DL4 1's complement rule for UDP csum 0 */
+			csum_value_final = ~csum_value_final;
+			break;
+
+		case IPPROTO_TCP:
+			/* DL4 Non-RFC compliant TCP checksum found */
+			if (*csum_field == (__force __sum16)0xFFFF)
+				csum_value_final = ~csum_value_final;
+			break;
+		}
+	}
+
+	if (csum_value_final == ntohs((__force __be16)*csum_field))
+		return 0;
+	else
+		return -EINVAL;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static int
+rmnet_map_ipv6_dl_csum_trailer(struct sk_buff *skb,
+			       struct rmnet_map_dl_csum_trailer *csum_trailer)
+{
+	__sum16 *csum_field, ip6_payload_csum, pseudo_csum, csum_temp;
+	u16 csum_value, csum_value_final;
+	__be16 ip6_hdr_csum, addend;
+	struct ipv6hdr *ip6h;
+	void *txporthdr;
+	u32 length;
+
+	ip6h = (struct ipv6hdr *)(skb->data);
+
+	txporthdr = skb->data + sizeof(struct ipv6hdr);
+	csum_field = rmnet_map_get_csum_field(ip6h->nexthdr, txporthdr);
+
+	if (!csum_field)
+		return -EPROTONOSUPPORT;
+
+	csum_value = ~ntohs(csum_trailer->csum_value);
+	ip6_hdr_csum = (__force __be16)
+			~ntohs((__force __be16)ip_compute_csum(ip6h,
+			       (int)(txporthdr - (void *)(skb->data))));
+	ip6_payload_csum = csum16_sub((__force __sum16)csum_value,
+				      ip6_hdr_csum);
+
+	length = (ip6h->nexthdr == IPPROTO_UDP) ?
+		 ntohs(((struct udphdr *)txporthdr)->len) :
+		 ntohs(ip6h->payload_len);
+	pseudo_csum = ~(csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+			     length, ip6h->nexthdr, 0));
+	addend = (__force __be16)ntohs((__force __be16)pseudo_csum);
+	pseudo_csum = csum16_add(ip6_payload_csum, addend);
+
+	addend = (__force __be16)ntohs((__force __be16)*csum_field);
+	csum_temp = ~csum16_sub(pseudo_csum, addend);
+	csum_value_final = (__force u16)csum_temp;
+
+	if (unlikely(csum_value_final == 0)) {
+		switch (ip6h->nexthdr) {
+		case IPPROTO_UDP:
+			/* RFC 2460 section 8.1
+			 * DL6 One's complement rule for UDP checksum 0
+			 */
+			csum_value_final = ~csum_value_final;
+			break;
+
+		case IPPROTO_TCP:
+			/* DL6 Non-RFC compliant TCP checksum found */
+			if (*csum_field == (__force __sum16)0xFFFF)
+				csum_value_final = ~csum_value_final;
+			break;
+		}
+	}
+
+	if (csum_value_final == ntohs((__force __be16)*csum_field))
+		return 0;
+	else
+		return -EINVAL;
+}
+#endif
+
 /* Adds MAP header to front of skb->data
  * Padding is calculated and set appropriately in MAP header. Mux ID is
  * initialized to 0.
@@ -66,7 +216,8 @@ struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb,
  * returned, indicating that there are no more packets to deaggregate. Caller
  * is responsible for freeing the original skb.
  */
-struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb)
+struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb,
+				      struct rmnet_port *port)
 {
 	struct rmnet_map_header *maph;
 	struct sk_buff *skbn;
@@ -78,6 +229,9 @@ struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb)
 	maph = (struct rmnet_map_header *)skb->data;
 	packet_len = ntohs(maph->pkt_len) + sizeof(struct rmnet_map_header);
 
+	if (port->data_format & RMNET_INGRESS_FORMAT_MAP_CKSUMV4)
+		packet_len += sizeof(struct rmnet_map_dl_csum_trailer);
+
 	if (((int)skb->len - (int)packet_len) < 0)
 		return NULL;
 
@@ -97,3 +251,33 @@ struct sk_buff *rmnet_map_deaggregate(struct sk_buff *skb)
 
 	return skbn;
 }
+
+/* Validates packet checksums. Function takes a pointer to
+ * the beginning of a buffer which contains the IP payload +
+ * padding + checksum trailer.
+ * Only IPv4 and IPv6 are supported along with TCP & UDP.
+ * Fragmented or tunneled packets are not supported.
+ */
+int rmnet_map_checksum_downlink_packet(struct sk_buff *skb, u16 len)
+{
+	struct rmnet_map_dl_csum_trailer *csum_trailer;
+
+	if (unlikely(!(skb->dev->features & NETIF_F_RXCSUM)))
+		return -EOPNOTSUPP;
+
+	csum_trailer = (struct rmnet_map_dl_csum_trailer *)(skb->data + len);
+
+	if (!csum_trailer->valid)
+		return -EINVAL;
+
+	if (skb->protocol == htons(ETH_P_IP))
+		return rmnet_map_ipv4_dl_csum_trailer(skb, csum_trailer);
+	else if (skb->protocol == htons(ETH_P_IPV6))
+#if IS_ENABLED(CONFIG_IPV6)
+		return rmnet_map_ipv6_dl_csum_trailer(skb, csum_trailer);
+#else
+		return -EPROTONOSUPPORT;
+#endif
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index 5bb29f4..879a2e0 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -188,6 +188,8 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
 	if (rmnet_get_endpoint(port, id))
 		return -EBUSY;
 
+	rmnet_dev->hw_features = NETIF_F_RXCSUM;
+
 	rc = register_netdevice(rmnet_dev);
 	if (!rc) {
 		ep->egress_dev = rmnet_dev;
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next v2 08/10] net: qualcomm: rmnet: Handle command packets with checksum trailer
From: Subash Abhinov Kasiviswanathan @ 2018-01-03 21:43 UTC (permalink / raw)
  To: davem, netdev, lkp; +Cc: Subash Abhinov Kasiviswanathan
In-Reply-To: <1515015787-6713-1-git-send-email-subashab@codeaurora.org>

When using the MAPv4 packet format in conjunction with MAP commands,
a dummy DL checksum trailer will be appended to the packet. Before
this packet is sent out as an ACK, the DL checksum trailer needs to be
removed.

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
index 51e6049..6bc328f 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_command.c
@@ -58,11 +58,24 @@ static u8 rmnet_map_do_flow_control(struct sk_buff *skb,
 }
 
 static void rmnet_map_send_ack(struct sk_buff *skb,
-			       unsigned char type)
+			       unsigned char type,
+			       struct rmnet_port *port)
 {
 	struct rmnet_map_control_command *cmd;
 	int xmit_status;
 
+	if (port->data_format & RMNET_INGRESS_FORMAT_MAP_CKSUMV4) {
+		if (skb->len < sizeof(struct rmnet_map_header) +
+		    RMNET_MAP_GET_LENGTH(skb) +
+		    sizeof(struct rmnet_map_dl_csum_trailer)) {
+			kfree_skb(skb);
+			return;
+		}
+
+		skb_trim(skb, skb->len -
+			 sizeof(struct rmnet_map_dl_csum_trailer));
+	}
+
 	skb->protocol = htons(ETH_P_MAP);
 
 	cmd = RMNET_MAP_GET_CMD_START(skb);
@@ -100,5 +113,5 @@ void rmnet_map_command(struct sk_buff *skb, struct rmnet_port *port)
 		break;
 	}
 	if (rc == RMNET_MAP_COMMAND_ACK)
-		rmnet_map_send_ack(skb, rc);
+		rmnet_map_send_ack(skb, rc, port);
 }
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next v2 10/10] net: qualcomm: rmnet: Add support for GSO
From: Subash Abhinov Kasiviswanathan @ 2018-01-03 21:43 UTC (permalink / raw)
  To: davem, netdev, lkp; +Cc: Subash Abhinov Kasiviswanathan
In-Reply-To: <1515015787-6713-1-git-send-email-subashab@codeaurora.org>

Real devices may support scatter gather(SG), so enable SG on rmnet
devices to use GSO. GSO reduces CPU cycles by 20% for a rate of
146Mpbs for a single stream TCP connection.

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
---
 drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index f7f57ce..570a227 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -190,6 +190,7 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
 
 	rmnet_dev->hw_features = NETIF_F_RXCSUM;
 	rmnet_dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
+	rmnet_dev->hw_features |= NETIF_F_SG;
 
 	rc = register_netdevice(rmnet_dev);
 	if (!rc) {
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next v2 09/10] net: qualcomm: rmnet: Add support for TX checksum offload
From: Subash Abhinov Kasiviswanathan @ 2018-01-03 21:43 UTC (permalink / raw)
  To: davem, netdev, lkp; +Cc: Subash Abhinov Kasiviswanathan
In-Reply-To: <1515015787-6713-1-git-send-email-subashab@codeaurora.org>

TX checksum offload applies to TCP / UDP packets which are not
fragmented using the MAPv4 checksum trailer. The following needs to be
done to have checksum computed in hardware -

1. Set the checksum start offset and inset offset.
2. Set the csum_enabled bit
3. Compute and set 1's complement of partial checksum field in
   transport header.

Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
---
 .../net/ethernet/qualcomm/rmnet/rmnet_handlers.c   |   8 ++
 drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h    |   2 +
 .../net/ethernet/qualcomm/rmnet/rmnet_map_data.c   | 120 +++++++++++++++++++++
 drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c    |   1 +
 4 files changed, 131 insertions(+)

diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
index 3409458..601edec 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
@@ -141,11 +141,19 @@ static int rmnet_map_egress_handler(struct sk_buff *skb,
 	additional_header_len = 0;
 	required_headroom = sizeof(struct rmnet_map_header);
 
+	if (port->data_format & RMNET_EGRESS_FORMAT_MAP_CKSUMV4) {
+		additional_header_len = sizeof(struct rmnet_map_ul_csum_header);
+		required_headroom += additional_header_len;
+	}
+
 	if (skb_headroom(skb) < required_headroom) {
 		if (pskb_expand_head(skb, required_headroom, 0, GFP_KERNEL))
 			goto fail;
 	}
 
+	if (port->data_format & RMNET_EGRESS_FORMAT_MAP_CKSUMV4)
+		rmnet_map_checksum_uplink_packet(skb, orig_dev);
+
 	map_header = rmnet_map_add_map_header(skb, additional_header_len, 0);
 	if (!map_header)
 		goto fail;
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
index ca9f473..6ce31e2 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map.h
@@ -89,5 +89,7 @@ struct rmnet_map_header *rmnet_map_add_map_header(struct sk_buff *skb,
 						  int hdrlen, int pad);
 void rmnet_map_command(struct sk_buff *skb, struct rmnet_port *port);
 int rmnet_map_checksum_downlink_packet(struct sk_buff *skb, u16 len);
+void rmnet_map_checksum_uplink_packet(struct sk_buff *skb,
+				      struct net_device *orig_dev);
 
 #endif /* _RMNET_MAP_H_ */
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
index 881c1dc..c74a6c5 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_map_data.c
@@ -171,6 +171,86 @@ static __sum16 *rmnet_map_get_csum_field(unsigned char protocol,
 }
 #endif
 
+static void rmnet_map_complement_ipv4_txporthdr_csum_field(void *iphdr)
+{
+	struct iphdr *ip4h = (struct iphdr *)iphdr;
+	void *txphdr;
+	u16 *csum;
+
+	txphdr = iphdr + ip4h->ihl * 4;
+
+	if (ip4h->protocol == IPPROTO_TCP || ip4h->protocol == IPPROTO_UDP) {
+		csum = (u16 *)rmnet_map_get_csum_field(ip4h->protocol, txphdr);
+		*csum = ~(*csum);
+	}
+}
+
+static void
+rmnet_map_ipv4_ul_csum_header(void *iphdr,
+			      struct rmnet_map_ul_csum_header *ul_header,
+			      struct sk_buff *skb)
+{
+	struct iphdr *ip4h = (struct iphdr *)iphdr;
+	__be16 *hdr = (__be16 *)ul_header, offset;
+
+	offset = htons((__force u16)(skb_transport_header(skb) -
+				     (unsigned char *)iphdr));
+	ul_header->csum_start_offset = offset;
+	ul_header->csum_insert_offset = skb->csum_offset;
+	ul_header->csum_enabled = 1;
+	if (ip4h->protocol == IPPROTO_UDP)
+		ul_header->udp_ip4_ind = 1;
+	else
+		ul_header->udp_ip4_ind = 0;
+
+	/* Changing remaining fields to network order */
+	hdr++;
+	*hdr = htons((__force u16)*hdr);
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	rmnet_map_complement_ipv4_txporthdr_csum_field(iphdr);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+static void rmnet_map_complement_ipv6_txporthdr_csum_field(void *ip6hdr)
+{
+	struct ipv6hdr *ip6h = (struct ipv6hdr *)ip6hdr;
+	void *txphdr;
+	u16 *csum;
+
+	txphdr = ip6hdr + sizeof(struct ipv6hdr);
+
+	if (ip6h->nexthdr == IPPROTO_TCP || ip6h->nexthdr == IPPROTO_UDP) {
+		csum = (u16 *)rmnet_map_get_csum_field(ip6h->nexthdr, txphdr);
+		*csum = ~(*csum);
+	}
+}
+
+static void
+rmnet_map_ipv6_ul_csum_header(void *ip6hdr,
+			      struct rmnet_map_ul_csum_header *ul_header,
+			      struct sk_buff *skb)
+{
+	__be16 *hdr = (__be16 *)ul_header, offset;
+
+	offset = htons((__force u16)(skb_transport_header(skb) -
+				     (unsigned char *)ip6hdr));
+	ul_header->csum_start_offset = offset;
+	ul_header->csum_insert_offset = skb->csum_offset;
+	ul_header->csum_enabled = 1;
+	ul_header->udp_ip4_ind = 0;
+
+	/* Changing remaining fields to network order */
+	hdr++;
+	*hdr = htons((__force u16)*hdr);
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	rmnet_map_complement_ipv6_txporthdr_csum_field(ip6hdr);
+}
+#endif
+
 /* Adds MAP header to front of skb->data
  * Padding is calculated and set appropriately in MAP header. Mux ID is
  * initialized to 0.
@@ -281,3 +361,43 @@ int rmnet_map_checksum_downlink_packet(struct sk_buff *skb, u16 len)
 
 	return 0;
 }
+
+/* Generates UL checksum meta info header for IPv4 and IPv6 over TCP and UDP
+ * packets that are supported for UL checksum offload.
+ */
+void rmnet_map_checksum_uplink_packet(struct sk_buff *skb,
+				      struct net_device *orig_dev)
+{
+	struct rmnet_map_ul_csum_header *ul_header;
+	void *iphdr;
+
+	ul_header = (struct rmnet_map_ul_csum_header *)
+		    skb_push(skb, sizeof(struct rmnet_map_ul_csum_header));
+
+	if (unlikely(!(orig_dev->features &
+		     (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))))
+		goto sw_csum;
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		iphdr = (char *)ul_header +
+			sizeof(struct rmnet_map_ul_csum_header);
+
+		if (skb->protocol == htons(ETH_P_IP)) {
+			rmnet_map_ipv4_ul_csum_header(iphdr, ul_header, skb);
+			return;
+		} else if (skb->protocol == htons(ETH_P_IPV6)) {
+#if IS_ENABLED(CONFIG_IPV6)
+			rmnet_map_ipv6_ul_csum_header(iphdr, ul_header, skb);
+			return;
+#else
+			goto sw_csum;
+#endif
+		}
+	}
+
+sw_csum:
+	ul_header->csum_start_offset = 0;
+	ul_header->csum_insert_offset = 0;
+	ul_header->csum_enabled = 0;
+	ul_header->udp_ip4_ind = 0;
+}
diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
index 879a2e0..f7f57ce 100644
--- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
+++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_vnd.c
@@ -189,6 +189,7 @@ int rmnet_vnd_newlink(u8 id, struct net_device *rmnet_dev,
 		return -EBUSY;
 
 	rmnet_dev->hw_features = NETIF_F_RXCSUM;
+	rmnet_dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
 
 	rc = register_netdevice(rmnet_dev);
 	if (!rc) {
-- 
1.9.1

^ permalink raw reply related

* Re: bonding: Delete an error message for a failed memory allocation in bond_update_slave_arr()
From: Eric Dumazet @ 2018-01-03 21:58 UTC (permalink / raw)
  To: Mahesh Bandewar (महेश बंडेवार),
	SF Markus Elfring
  Cc: linux-netdev, Andy Gospodarek, Jay Vosburgh, Veaceslav Falico,
	LKML, kernel-janitors
In-Reply-To: <CAF2d9jh1EHUx3BzWghBWRAZt4acO6W-_eUXtD7dk2YBJZfuwsg@mail.gmail.com>

On Wed, 2018-01-03 at 11:28 -0800, Mahesh Bandewar (महेश बंडेवार)
wrote:
> On Wed, Jan 3, 2018 at 12:45 AM, SF Markus Elfring
> <elfring@users.sourceforge.net> wrote:
> > > > Omit an extra message for a memory allocation failure in this function.
> > > > 
> > > > This issue was detected by using the Coccinelle software.
> > > > 
> > > 
> > > What is the issue with this message?
> > 
> > * Is it redundant?
> > 
> > * Would a Linux allocation failure report be already sufficient here?
> > 
> 
> If you see 8 out of 9 call sites in this file ignore the return value.
> This message in the log could give a clue when debugging. Unless it's
> spamming it's not harmful, or is it?

A failed kzalloc() would already give a complete stack trace.

Really the pr_err() adds no value here.



^ permalink raw reply

* Re: [PATCH net-next v2 05/10] net: qualcomm: rmnet: Set pacing rate
From: Eric Dumazet @ 2018-01-03 22:01 UTC (permalink / raw)
  To: Subash Abhinov Kasiviswanathan, davem, netdev, lkp
In-Reply-To: <1515015787-6713-6-git-send-email-subashab@codeaurora.org>

On Wed, 2018-01-03 at 14:43 -0700, Subash Abhinov Kasiviswanathan
wrote:
> With a default pacing rate of 10, the uplink data rate for a single
> TCP stream is around 10Mbps. Setting it to 8 increases it to 146Mbps
> which is the maximum supported transmit rate.
> 
> Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
> ---
>  drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
> index 8e1f43a..8f8c4f2 100644
> --- a/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
> +++ b/drivers/net/ethernet/qualcomm/rmnet/rmnet_handlers.c
> @@ -16,6 +16,7 @@
>  #include <linux/netdevice.h>
>  #include <linux/netdev_features.h>
>  #include <linux/if_arp.h>
> +#include <net/sock.h>
>  #include "rmnet_private.h"
>  #include "rmnet_config.h"
>  #include "rmnet_vnd.h"
> @@ -204,6 +205,8 @@ void rmnet_egress_handler(struct sk_buff *skb)
>  	struct rmnet_priv *priv;
>  	u8 mux_id;
>  
> +	sk_pacing_shift_update(skb->sk, 8);

Well... Please tell us why this is needed in this driver.

This interface is meant for wifi aggregation, not to work around some
strange ethernet drivers designs.

^ permalink raw reply

* [PATCH net] ipv6: fix general protection fault in fib6_add()
From: Wei Wang @ 2018-01-03 22:11 UTC (permalink / raw)
  To: David Miller, netdev; +Cc: Martin KaFai Lau, David Ahern, Wei Wang

From: Wei Wang <weiwan@google.com>

In fib6_add(), pn could be NULL if fib6_add_1() failed to return a fib6
node. Checking pn != fn before accessing pn->leaf makes sure pn is not
NULL.
This fixes the following GPF reported by syzkaller:
general protection fault: 0000 [#1] SMP KASAN
Dumping ftrace buffer:
   (ftrace buffer empty)
Modules linked in:
CPU: 0 PID: 3201 Comm: syzkaller001778 Not tainted 4.15.0-rc5+ #151
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:fib6_add+0x736/0x15a0 net/ipv6/ip6_fib.c:1244
RSP: 0018:ffff8801c7626a70 EFLAGS: 00010202
RAX: dffffc0000000000 RBX: 0000000000000020 RCX: ffffffff84794465
RDX: 0000000000000004 RSI: ffff8801d38935f0 RDI: 0000000000000282
RBP: ffff8801c7626da0 R08: 1ffff10038ec4c35 R09: 0000000000000000
R10: ffff8801c7626c68 R11: 0000000000000000 R12: 00000000fffffffe
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000009
FS:  0000000000000000(0000) GS:ffff8801db200000(0063) knlGS:0000000009b70840
CS:  0010 DS: 002b ES: 002b CR0: 0000000080050033
CR2: 0000000020be1000 CR3: 00000001d585a006 CR4: 00000000001606f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 __ip6_ins_rt+0x6c/0x90 net/ipv6/route.c:1006
 ip6_route_multipath_add+0xd14/0x16c0 net/ipv6/route.c:3833
 inet6_rtm_newroute+0xdc/0x160 net/ipv6/route.c:3957
 rtnetlink_rcv_msg+0x733/0x1020 net/core/rtnetlink.c:4411
 netlink_rcv_skb+0x21e/0x460 net/netlink/af_netlink.c:2408
 rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4423
 netlink_unicast_kernel net/netlink/af_netlink.c:1275 [inline]
 netlink_unicast+0x4e8/0x6f0 net/netlink/af_netlink.c:1301
 netlink_sendmsg+0xa4a/0xe60 net/netlink/af_netlink.c:1864
 sock_sendmsg_nosec net/socket.c:636 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:646
 sock_write_iter+0x31a/0x5d0 net/socket.c:915
 call_write_iter include/linux/fs.h:1772 [inline]
 do_iter_readv_writev+0x525/0x7f0 fs/read_write.c:653
 do_iter_write+0x154/0x540 fs/read_write.c:932
 compat_writev+0x225/0x420 fs/read_write.c:1246
 do_compat_writev+0x115/0x220 fs/read_write.c:1267
 C_SYSC_writev fs/read_write.c:1278 [inline]
 compat_SyS_writev+0x26/0x30 fs/read_write.c:1274
 do_syscall_32_irqs_on arch/x86/entry/common.c:327 [inline]
 do_fast_syscall_32+0x3ee/0xf9d arch/x86/entry/common.c:389
 entry_SYSENTER_compat+0x54/0x63 arch/x86/entry/entry_64_compat.S:125

Reported-by: syzbot <syzkaller@googlegroups.com>
Fixes: 66f5d6ce53e6 ("ipv6: replace rwlock with rcu and spinlock in fib6_table")
Signed-off-by: Wei Wang <weiwan@google.com>
---
 net/ipv6/ip6_fib.c | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index f5285f4e1d08..d11a5578e4f8 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1241,23 +1241,28 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
 		 * If fib6_add_1 has cleared the old leaf pointer in the
 		 * super-tree leaf node we have to find a new one for it.
 		 */
-		struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf,
-					    lockdep_is_held(&table->tb6_lock));
-		if (pn != fn && pn_leaf == rt) {
-			pn_leaf = NULL;
-			RCU_INIT_POINTER(pn->leaf, NULL);
-			atomic_dec(&rt->rt6i_ref);
-		}
-		if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
-			pn_leaf = fib6_find_prefix(info->nl_net, table, pn);
-#if RT6_DEBUG >= 2
-			if (!pn_leaf) {
-				WARN_ON(!pn_leaf);
-				pn_leaf = info->nl_net->ipv6.ip6_null_entry;
+		if (pn != fn) {
+			struct rt6_info *pn_leaf =
+				rcu_dereference_protected(pn->leaf,
+				    lockdep_is_held(&table->tb6_lock));
+			if (pn_leaf == rt) {
+				pn_leaf = NULL;
+				RCU_INIT_POINTER(pn->leaf, NULL);
+				atomic_dec(&rt->rt6i_ref);
 			}
+			if (!pn_leaf && !(pn->fn_flags & RTN_RTINFO)) {
+				pn_leaf = fib6_find_prefix(info->nl_net, table,
+							   pn);
+#if RT6_DEBUG >= 2
+				if (!pn_leaf) {
+					WARN_ON(!pn_leaf);
+					pn_leaf =
+					    info->nl_net->ipv6.ip6_null_entry;
+				}
 #endif
-			atomic_inc(&pn_leaf->rt6i_ref);
-			rcu_assign_pointer(pn->leaf, pn_leaf);
+				atomic_inc(&pn_leaf->rt6i_ref);
+				rcu_assign_pointer(pn->leaf, pn_leaf);
+			}
 		}
 #endif
 		goto failure;
-- 
2.16.0.rc0.223.g4a4ac83678-goog

^ permalink raw reply related

* uapi/if_ether.h: prevent redefinition of struct ethhdr
From: Hauke Mehrtens @ 2018-01-03 22:14 UTC (permalink / raw)
  To: davem-fT/PcQaiUtIeIZ0/mPfg9Q
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA, linux-api-u79uwXL29TY76Z2rM5mHXA,
	musl-ZwoEplunGu1jrUoiu81ncdBPR1lH4CV8,
	felix.janda-1KBjaw7Xf1+zQB+pC5nmwQ,
	f.fainelli-Re5JQEeQqe8AvxtiuMwx3w, Hauke Mehrtens

Musl provides its own ethhdr struct definition. Add a guard to prevent
its definition of the appropriate musl header has already been included.

glibc does not implement this header, but when glibc will implement this
they can just define __UAPI_DEF_ETHHDR 0 to make it work with the
kernel.

Signed-off-by: Hauke Mehrtens <hauke-5/S+JYg5SzeELgA04lAiVw@public.gmane.org>
---
 include/uapi/linux/if_ether.h    | 3 +++
 include/uapi/linux/libc-compat.h | 6 ++++++
 2 files changed, 9 insertions(+)

diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 3ee3bf7c8526..144de4d2f385 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -23,6 +23,7 @@
 #define _UAPI_LINUX_IF_ETHER_H
 
 #include <linux/types.h>
+#include <linux/libc-compat.h>
 
 /*
  *	IEEE 802.3 Ethernet magic constants.  The frame sizes omit the preamble
@@ -149,11 +150,13 @@
  *	This is an Ethernet frame header.
  */
 
+#if __UAPI_DEF_ETHHDR
 struct ethhdr {
 	unsigned char	h_dest[ETH_ALEN];	/* destination eth addr	*/
 	unsigned char	h_source[ETH_ALEN];	/* source ether addr	*/
 	__be16		h_proto;		/* packet type ID field	*/
 } __attribute__((packed));
+#endif
 
 
 #endif /* _UAPI_LINUX_IF_ETHER_H */
diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h
index 8254c937c9f4..fc29efaa918c 100644
--- a/include/uapi/linux/libc-compat.h
+++ b/include/uapi/linux/libc-compat.h
@@ -264,4 +264,10 @@
 
 #endif /* __GLIBC__ */
 
+/* Definitions for if_ether.h */
+/* allow libcs like musl to deactivate this, glibc does not implement this. */
+#ifndef __UAPI_DEF_ETHHDR
+#define __UAPI_DEF_ETHHDR		1
+#endif
+
 #endif /* _UAPI_LIBC_COMPAT_H */
-- 
2.11.0

^ permalink raw reply related

* Re: Linux ECN Handling
From: Steve Ibanez @ 2018-01-03 22:21 UTC (permalink / raw)
  To: Neal Cardwell
  Cc: Eric Dumazet, Yuchung Cheng, Daniel Borkmann, Netdev,
	Florian Westphal, Mohammad Alizadeh, Lawrence Brakmo
In-Reply-To: <CADVnQym9c1n-=KTGou0_WJv-68XYaKG7BUuss9eXYg9aca1ffQ@mail.gmail.com>

[-- Attachment #1: Type: text/plain, Size: 3169 bytes --]

Hi Neal,

I've attached a pdf of a slide that I made which shows some data from
the kernel log at the receiver as well as the timestamp, SeqNo, and
length of the corresponding segments from the tcpdump trace at the
receiver interface. Hopefully this helps clarify why I think
tcp_transmit_skb() is called at some point before tcp_ack_snd_check()
while processing the CWR segment for which no ACK is sent. Please let
me know if anything is unclear or if you know where that initial call
to tcp_transmit_skb() might be coming from and why it's happening
prematurely.

Best,
-Steve

On Wed, Jan 3, 2018 at 11:39 AM, Neal Cardwell <ncardwell@google.com> wrote:
> On Tue, Jan 2, 2018 at 6:57 PM, Steve Ibanez <sibanez@stanford.edu> wrote:
>> Hi Neal,
>>
>> Sorry, my last email was incorrect. It turns out the default tcp
>> congestion control alg that was being used on my client machines was
>> cubic instead of dctcp. That is why tp->processing_cwr field was never
>> set in the tcp_rcv_established function. I've changed the default back
>> to dctcp on all of my machines.
>>
>> I am now logging the value of tp->rcv_nxt at the top of the
>> tcp_transmit_skb() function for all CWR segments. I see that during
>> normal operation, the value of tp->rcv_nxt is equal to the SeqNo in
>> the CWR segment  + length of the CWR segment.
>
> OK, thanks. That makes sense.
>
> This part I didn't understand:
>
>> However, for the unACKed
>> CWR segment, the value of tp->rcv_nxt is just equal to the SeqNo in
>> the CWR segment (i.e. not incremented by the length). And I see that
>> by the time the tcp_ack_snd_check() function is executed, tp->rcv_nxt
>> has been incremented by the length of the unACKed CWR segment.
>
> I would have thought that for the processing of the skb that has the
> CWR, the sequence would be:
>
> (1)  "...the tcp_ack_snd_check() function is executed, tp->rcv_nxt has
> been incremented by the length of the unACKed CWR segment"
>
> (2) then we send the ACK, and the instrumentation at the top of the
> tcp_transmit_skb() function logs that rcv_nxt value (which "has been
> incremented by the length of the unACKed CWR segment").
>
> But you are saying "for the unACKed CWR segment, the value of
> tp->rcv_nxt is just equal to the SeqNo in the CWR segment (i.e. not
> incremented by the length)", which does not seem to match my
> prediction in (2). Apparently I am mis-understanding the sequence.
> Perhaps you can help clear it up for me? :-)
>
> Is it possible that the case where you see "tp->rcv_nxt is just equal
> to the SeqNo in the CWR segment" is a log line that was logged while
> processing the skb that precedes the skb with the CWR?
>
>> The tcp_transmit_skb() function sets the outgoing segment's ack_seq to
>> be tp->rcv_next:
>>
>> th->ack_seq             = htonl(tp->rcv_nxt);
>>
>> So I think the rcv_nxt field is supposed to be incremented before
>> reaching tcp_transmit_skb(). Can you see any reason as to why this
>> field would not be incremented for CWR segments sometimes?
>
> No, so far I haven't been able to think of a reason why rcv_nxt would
> not be incremented for in-order CWR-marked segments...
>
> cheers,
> neal

[-- Attachment #2: CWR_log_and_trace.pdf --]
[-- Type: application/pdf, Size: 34236 bytes --]

^ permalink raw reply

* [PATCH] net/mlx5e: hide an unused variable
From: Arnd Bergmann @ 2018-01-03 22:40 UTC (permalink / raw)
  To: Saeed Mahameed, Matan Barak, Leon Romanovsky
  Cc: Arnd Bergmann, Or Gerlitz, Hadar Hen Zion, David S. Miller,
	Paul Blakey, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA

The uplink_rpriv variable was added at the start of the function but
only used inside of an #ifdef:

drivers/net/ethernet/mellanox/mlx5/core/en_tc.c: In function 'mlx5e_route_lookup_ipv6':
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c:1549:25: error: unused variable 'uplink_rpriv' [-Werror=unused-variable]

This moves the declaration into that #ifdef as well.

Fixes: 5ed99fb421d4 ("net/mlx5e: Move ethernet representors data into separate struct")
Signed-off-by: Arnd Bergmann <arnd-r2nGTMty4D4@public.gmane.org>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 25a8073f15d8..933275fe03b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1546,11 +1546,11 @@ static int mlx5e_route_lookup_ipv6(struct mlx5e_priv *priv,
 				   struct neighbour **out_n,
 				   int *out_ttl)
 {
-	struct mlx5e_rep_priv *uplink_rpriv;
 	struct neighbour *n = NULL;
 	struct dst_entry *dst;
 
 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
+	struct mlx5e_rep_priv *uplink_rpriv;
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	int ret;
 
-- 
2.9.0

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* Re: [PATCH net-next v2 05/10] net: qualcomm: rmnet: Set pacing rate
From: Subash Abhinov Kasiviswanathan @ 2018-01-03 22:45 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: davem, netdev, lkp
In-Reply-To: <1515016918.131759.2.camel@gmail.com>

>> +	sk_pacing_shift_update(skb->sk, 8);
> 
> Well... Please tell us why this is needed in this driver.
> 
> This interface is meant for wifi aggregation, not to work around some
> strange ethernet drivers designs.

Hi Eric

The real device over which the rmnet devices are installed also
aggregate multiple IP packets and sends them as a single large aggregate
frame to the hardware.

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project

^ permalink raw reply

* [PATCH net-next 1/4] l2tp: revert "l2tp: add peer_offset parameter"
From: James Chapman @ 2018-01-03 22:48 UTC (permalink / raw)
  To: netdev; +Cc: g.nault, lorenzo.bianconi, liuhangbin, James Chapman
In-Reply-To: <1515019687-1556-1-git-send-email-jchapman@katalix.com>

Revert commit f15bc54eeecd ("l2tp: add peer_offset parameter"). This
is removed because it is adding another configurable offset and
configurable offsets are being removed.

Signed-off-by: James Chapman <jchapman@katalix.com>
---
 include/uapi/linux/l2tp.h |  1 -
 net/l2tp/l2tp_core.c      |  3 +--
 net/l2tp/l2tp_core.h      | 13 +++----------
 net/l2tp/l2tp_debugfs.c   |  8 +++-----
 net/l2tp/l2tp_netlink.c   | 21 +--------------------
 5 files changed, 8 insertions(+), 38 deletions(-)

diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h
index d6fee55..d84ce5c 100644
--- a/include/uapi/linux/l2tp.h
+++ b/include/uapi/linux/l2tp.h
@@ -127,7 +127,6 @@ enum {
 	L2TP_ATTR_UDP_ZERO_CSUM6_TX,	/* flag */
 	L2TP_ATTR_UDP_ZERO_CSUM6_RX,	/* flag */
 	L2TP_ATTR_PAD,
-	L2TP_ATTR_PEER_OFFSET,		/* u16 */
 	__L2TP_ATTR_MAX,
 };
 
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 6ff6471..115918a 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -792,7 +792,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 			ptr += 2 + offset;
 		}
 	} else
-		ptr += session->peer_offset;
+		ptr += session->offset;
 
 	offset = ptr - optr;
 	if (!pskb_may_pull(skb, offset))
@@ -1785,7 +1785,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
 			session->lns_mode = cfg->lns_mode;
 			session->reorder_timeout = cfg->reorder_timeout;
 			session->offset = cfg->offset;
-			session->peer_offset = cfg->peer_offset;
 			session->l2specific_type = cfg->l2specific_type;
 			session->l2specific_len = cfg->l2specific_len;
 			session->cookie_len = cfg->cookie_len;
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index c6fe7cc..9534e16 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -59,8 +59,7 @@ struct l2tp_session_cfg {
 	int			debug;		/* bitmask of debug message
 						 * categories */
 	u16			vlan_id;	/* VLAN pseudowire only */
-	u16			offset;		/* offset to tx payload */
-	u16			peer_offset;	/* offset to rx payload */
+	u16			offset;		/* offset to payload */
 	u16			l2specific_len;	/* Layer 2 specific length */
 	u16			l2specific_type; /* Layer 2 specific type */
 	u8			cookie[8];	/* optional cookie */
@@ -87,14 +86,8 @@ struct l2tp_session {
 	int			cookie_len;
 	u8			peer_cookie[8];
 	int			peer_cookie_len;
-	u16			offset;		/* offset from end of L2TP
-						 * header to beginning of
-						 * tx data
-						 */
-	u16			peer_offset;	/* offset from end of L2TP
-						 * header to beginning of
-						 * rx data
-						 */
+	u16			offset;		/* offset from end of L2TP header
+						   to beginning of data */
 	u16			l2specific_len;
 	u16			l2specific_type;
 	u16			hdr_len;
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index 4cc30b3..eb69411 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -180,9 +180,8 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
 		   session->lns_mode ? "LNS" : "LAC",
 		   session->debug,
 		   jiffies_to_msecs(session->reorder_timeout));
-	seq_printf(m, "   offset %hu peer_offset %hu l2specific %hu/%hu\n",
-		   session->offset, session->peer_offset,
-		   session->l2specific_type, session->l2specific_len);
+	seq_printf(m, "   offset %hu l2specific %hu/%hu\n",
+		   session->offset, session->l2specific_type, session->l2specific_len);
 	if (session->cookie_len) {
 		seq_printf(m, "   cookie %02x%02x%02x%02x",
 			   session->cookie[0], session->cookie[1],
@@ -229,8 +228,7 @@ static int l2tp_dfs_seq_show(struct seq_file *m, void *v)
 		seq_puts(m, " debug tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
 		seq_puts(m, "  SESSION ID, peer ID, PWTYPE\n");
 		seq_puts(m, "   refcnt cnt\n");
-		seq_puts(m, "   offset OFFSET peer_offset OFFSET");
-		seq_puts(m, " l2specific TYPE/LEN\n");
+		seq_puts(m, "   offset OFFSET l2specific TYPE/LEN\n");
 		seq_puts(m, "   [ cookie ]\n");
 		seq_puts(m, "   [ peer cookie ]\n");
 		seq_puts(m, "   config mtu/mru/rcvseq/sendseq/dataseq/lns debug reorderto\n");
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index d7d4d7a..7e9c501 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -547,25 +547,9 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
 	}
 
 	if (tunnel->version > 2) {
-		if (info->attrs[L2TP_ATTR_PEER_OFFSET]) {
-			struct nlattr *peer_offset;
-
-			peer_offset = info->attrs[L2TP_ATTR_PEER_OFFSET];
-			cfg.peer_offset = nla_get_u16(peer_offset);
-		}
-
-		if (info->attrs[L2TP_ATTR_OFFSET]) {
+		if (info->attrs[L2TP_ATTR_OFFSET])
 			cfg.offset = nla_get_u16(info->attrs[L2TP_ATTR_OFFSET]);
 
-			/* in order to maintain compatibility with older
-			 * versions where offset was used for both tx and
-			 * rx side, update rx side with offset if peer_offset
-			 * is not provided by userspace
-			 */
-			if (!info->attrs[L2TP_ATTR_PEER_OFFSET])
-				cfg.peer_offset = cfg.offset;
-		}
-
 		if (info->attrs[L2TP_ATTR_DATA_SEQ])
 			cfg.data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]);
 
@@ -779,8 +763,6 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
 	     nla_put_string(skb, L2TP_ATTR_IFNAME, session->ifname)) ||
 	    (session->offset &&
 	     nla_put_u16(skb, L2TP_ATTR_OFFSET, session->offset)) ||
-	    (session->peer_offset &&
-	     nla_put_u16(skb, L2TP_ATTR_PEER_OFFSET, session->peer_offset)) ||
 	    (session->cookie_len &&
 	     nla_put(skb, L2TP_ATTR_COOKIE, session->cookie_len,
 		     &session->cookie[0])) ||
@@ -921,7 +903,6 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
 	[L2TP_ATTR_PW_TYPE]		= { .type = NLA_U16, },
 	[L2TP_ATTR_ENCAP_TYPE]		= { .type = NLA_U16, },
 	[L2TP_ATTR_OFFSET]		= { .type = NLA_U16, },
-	[L2TP_ATTR_PEER_OFFSET]		= { .type = NLA_U16, },
 	[L2TP_ATTR_DATA_SEQ]		= { .type = NLA_U8, },
 	[L2TP_ATTR_L2SPEC_TYPE]		= { .type = NLA_U8, },
 	[L2TP_ATTR_L2SPEC_LEN]		= { .type = NLA_U8, },
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next 3/4] l2tp: remove configurable payload offset
From: James Chapman @ 2018-01-03 22:48 UTC (permalink / raw)
  To: netdev; +Cc: g.nault, lorenzo.bianconi, liuhangbin, James Chapman
In-Reply-To: <1515019687-1556-1-git-send-email-jchapman@katalix.com>

If L2TP_ATTR_OFFSET is set to a non-zero value in L2TPv3 tunnels, it
results in L2TPv3 packets being transmitted which might not be
compliant with the L2TPv3 RFC. This patch has l2tp ignore the offset
setting and send all packets with no offset.

In more detail:

L2TPv2 supports a variable offset from the L2TPv2 header to the
payload. The offset value is indicated by an optional field in the
L2TP header.  Our L2TP implementation already detects the presence of
the optional offset and skips that many bytes when handling data
received packets. All transmitted packets are always transmitted with
no offset.

L2TPv3 has no optional offset field in the L2TPv3 packet
header. Instead, L2TPv3 defines optional fields in a "Layer-2 Specific
Sublayer". At the time when the original L2TP code was written, there
was talk at IETF of offset being implemented in a new Layer-2 Specific
Sublayer. A L2TP_ATTR_OFFSET netlink attribute was added so that this
offset could be configured and the intention was to allow it to be
also used to set the tx offset for L2TPv2. However, no L2TPv3 offset
was ever specified and the L2TP_ATTR_OFFSET parameter was forgotten
about.

Setting L2TP_ATTR_OFFSET results in L2TPv3 packets being transmitted
with the specified number of bytes padding between L2TPv3 header and
payload. This is not compliant with L2TPv3 RFC3931. This change
removes the configurable offset altogether while retaining
L2TP_ATTR_OFFSET for backwards compatibility. Any L2TP_ATTR_OFFSET
value is ignored.

Signed-off-by: James Chapman <jchapman@katalix.com>
---
 net/l2tp/l2tp_core.c    | 14 ++++----------
 net/l2tp/l2tp_core.h    |  3 ---
 net/l2tp/l2tp_debugfs.c |  4 ++--
 net/l2tp/l2tp_netlink.c |  3 ---
 4 files changed, 6 insertions(+), 18 deletions(-)

diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 115918a..786cd7f 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -780,10 +780,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 		}
 	}
 
-	/* Session data offset is handled differently for L2TPv2 and
-	 * L2TPv3. For L2TPv2, there is an optional 16-bit value in
-	 * the header. For L2TPv3, the offset is negotiated using AVPs
-	 * in the session setup control protocol.
+	/* Session data offset is defined only for L2TPv2 and is
+	 * indicated by an optional 16-bit value in the header.
 	 */
 	if (tunnel->version == L2TP_HDR_VER_2) {
 		/* If offset bit set, skip it. */
@@ -791,8 +789,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
 			offset = ntohs(*(__be16 *)ptr);
 			ptr += 2 + offset;
 		}
-	} else
-		ptr += session->offset;
+	}
 
 	offset = ptr - optr;
 	if (!pskb_may_pull(skb, offset))
@@ -1068,8 +1065,6 @@ static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
 		}
 		bufp += session->l2specific_len;
 	}
-	if (session->offset)
-		bufp += session->offset;
 
 	return bufp - optr;
 }
@@ -1734,7 +1729,7 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version)
 		if (session->send_seq)
 			session->hdr_len += 4;
 	} else {
-		session->hdr_len = 4 + session->cookie_len + session->l2specific_len + session->offset;
+		session->hdr_len = 4 + session->cookie_len + session->l2specific_len;
 		if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP)
 			session->hdr_len += 4;
 	}
@@ -1784,7 +1779,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
 			session->recv_seq = cfg->recv_seq;
 			session->lns_mode = cfg->lns_mode;
 			session->reorder_timeout = cfg->reorder_timeout;
-			session->offset = cfg->offset;
 			session->l2specific_type = cfg->l2specific_type;
 			session->l2specific_len = cfg->l2specific_len;
 			session->cookie_len = cfg->cookie_len;
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 9534e16..c2e9bbd 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -59,7 +59,6 @@ struct l2tp_session_cfg {
 	int			debug;		/* bitmask of debug message
 						 * categories */
 	u16			vlan_id;	/* VLAN pseudowire only */
-	u16			offset;		/* offset to payload */
 	u16			l2specific_len;	/* Layer 2 specific length */
 	u16			l2specific_type; /* Layer 2 specific type */
 	u8			cookie[8];	/* optional cookie */
@@ -86,8 +85,6 @@ struct l2tp_session {
 	int			cookie_len;
 	u8			peer_cookie[8];
 	int			peer_cookie_len;
-	u16			offset;		/* offset from end of L2TP header
-						   to beginning of data */
 	u16			l2specific_len;
 	u16			l2specific_type;
 	u16			hdr_len;
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index eb69411..2c30587 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -180,8 +180,8 @@ static void l2tp_dfs_seq_session_show(struct seq_file *m, void *v)
 		   session->lns_mode ? "LNS" : "LAC",
 		   session->debug,
 		   jiffies_to_msecs(session->reorder_timeout));
-	seq_printf(m, "   offset %hu l2specific %hu/%hu\n",
-		   session->offset, session->l2specific_type, session->l2specific_len);
+	seq_printf(m, "   offset 0 l2specific %hu/%hu\n",
+		   session->l2specific_type, session->l2specific_len);
 	if (session->cookie_len) {
 		seq_printf(m, "   cookie %02x%02x%02x%02x",
 			   session->cookie[0], session->cookie[1],
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index a1f24fb..e1ca29f 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -547,9 +547,6 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
 	}
 
 	if (tunnel->version > 2) {
-		if (info->attrs[L2TP_ATTR_OFFSET])
-			cfg.offset = nla_get_u16(info->attrs[L2TP_ATTR_OFFSET]);
-
 		if (info->attrs[L2TP_ATTR_DATA_SEQ])
 			cfg.data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]);
 
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next 2/4] l2tp: revert "l2tp: fix missing print session offset info"
From: James Chapman @ 2018-01-03 22:48 UTC (permalink / raw)
  To: netdev; +Cc: g.nault, lorenzo.bianconi, liuhangbin, James Chapman
In-Reply-To: <1515019687-1556-1-git-send-email-jchapman@katalix.com>

Revert commit 820da5357572 ("l2tp: fix missing print session offset
info").  The peer_offset parameter is removed.

Signed-off-by: James Chapman <jchapman@katalix.com>
---
 net/l2tp/l2tp_netlink.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 7e9c501..a1f24fb 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -761,8 +761,6 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
 
 	if ((session->ifname[0] &&
 	     nla_put_string(skb, L2TP_ATTR_IFNAME, session->ifname)) ||
-	    (session->offset &&
-	     nla_put_u16(skb, L2TP_ATTR_OFFSET, session->offset)) ||
 	    (session->cookie_len &&
 	     nla_put(skb, L2TP_ATTR_COOKIE, session->cookie_len,
 		     &session->cookie[0])) ||
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next 0/4] l2tp: remove configurable offset parameters
From: James Chapman @ 2018-01-03 22:48 UTC (permalink / raw)
  To: netdev; +Cc: g.nault, lorenzo.bianconi, liuhangbin, James Chapman

This patch series removes all code to support a configurable offset in
transmitted l2tp packets. Code to handle this is incomplete and buggy
and has been this way for years. If anyone tried to configure an
offset, it would be ignored for L2TPv2 tunnels, or for L2TPv3 tunnels,
could result in L2TPv3 packets being transmitted which are not
compliant with L2TPv3 RFC3931. This patch series removes the support
for configurable offsets.

No known userspace l2tp daemon configures an offset. However,
iproute2's "ip l2tp" command has an offset parameter and if set, the
value is passed to the kernel. This is the most likely use case where
offsets might be configured, e.g.

   ip l2tp add tunnel local 1.1.1.1 remote 1.1.1.2 tunnel_id 1 \
       peer_tunnel_id 2 encap ip
   ip l2tp add session name l2tp0 tunnel_id 1 session_id 1 \
       peer_session_id 2 offset 8

The above would result in packets being transmitted to 1.1.1.2 with 8
bytes padding between the L2TPv3 header and the payload. The peer
would need to be configured with the same offset value. However, the
packets are not compliant with the L2TPv3 RFC, hence I think it's
unlikely that offset is being used. With this patch series applied,
the offset would not be configured. The peer would need to be modified to
remove its offset setting too.

iproute2 should be modified to remove or ignore the ip l2tp offset
parameter.

This issue was discovered when reviewing a patch series from
lorenzo.bianconi@redhat.com which adds another netlink attribute to
configure the expected offset in received L2TPv3 packets. This change
is reverted by this series because offsets do not exist in L2TPv3
packets. These commits are:

  commit f15bc54eeecd ("l2tp: add peer_offset parameter")
  commit 820da5357572 ("l2tp: fix missing print session offset info")

In more detail:

The L2TPv2 protocol supports a variable offset from the L2TPv2 header
to the payload to give the sender implementation some flexibility for
data alignment when adding L2TP headers on to payloads. The offset
value is indicated by an optional field in the L2TP header.  Our L2TP
implementation already detects the presence of the optional offset in
received packets and skips those bytes when parsing packets. All
transmitted L2TPv2 packets are always transmitted with no offset.

L2TPv3 has no optional offset field in the L2TPv3 packet
header. Instead, L2TPv3 defines optional fields in a "Layer-2 Specific
Sublayer". At the time when the original L2TP code was written, there
was talk at IETF of offset being implemented in a new Layer-2 Specific
Sublayer. A L2TP_ATTR_OFFSET netlink attribute was added so that this
offset could be configured and the intention was to allow it to be
also used to set the tx offset for L2TPv2. However, no L2TPv3 offset
was ever specified and the L2TP_ATTR_OFFSET parameter was forgotten
about.

Setting L2TP_ATTR_OFFSET results in L2TPv3 packets being transmitted
with the specified number of bytes padding between L2TPv3 header and
payload. This is not compliant with L2TPv3 RFC3931. So this change
removes the configurable offset altogether while retaining
L2TP_ATTR_OFFSET in the API for backwards compatibility. If
L2TP_ATTR_OFFSET is given, its value is now silently ignored.

James Chapman (4):
  l2tp: revert "l2tp: add peer_offset parameter"
  l2tp: revert "l2tp: fix missing print session offset info"
  l2tp: remove configurable payload offset
  l2tp: add comment in API header that L2TP_ATTR_OFFSET is not used

 include/uapi/linux/l2tp.h |  3 +--
 net/l2tp/l2tp_core.c      | 15 ++++-----------
 net/l2tp/l2tp_core.h      | 10 ----------
 net/l2tp/l2tp_debugfs.c   |  6 ++----
 net/l2tp/l2tp_netlink.c   | 24 ------------------------
 5 files changed, 7 insertions(+), 51 deletions(-)

-- 
1.9.1

^ permalink raw reply

* [PATCH net-next 4/4] l2tp: add comment in API header that L2TP_ATTR_OFFSET is not used
From: James Chapman @ 2018-01-03 22:48 UTC (permalink / raw)
  To: netdev; +Cc: g.nault, lorenzo.bianconi, liuhangbin, James Chapman
In-Reply-To: <1515019687-1556-1-git-send-email-jchapman@katalix.com>

Signed-off-by: James Chapman <jchapman@katalix.com>
---
 include/uapi/linux/l2tp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h
index d84ce5c..f78eef4 100644
--- a/include/uapi/linux/l2tp.h
+++ b/include/uapi/linux/l2tp.h
@@ -94,7 +94,7 @@ enum {
 	L2TP_ATTR_NONE,			/* no data */
 	L2TP_ATTR_PW_TYPE,		/* u16, enum l2tp_pwtype */
 	L2TP_ATTR_ENCAP_TYPE,		/* u16, enum l2tp_encap_type */
-	L2TP_ATTR_OFFSET,		/* u16 */
+	L2TP_ATTR_OFFSET,		/* u16 (not used) */
 	L2TP_ATTR_DATA_SEQ,		/* u16 */
 	L2TP_ATTR_L2SPEC_TYPE,		/* u8, enum l2tp_l2spec_type */
 	L2TP_ATTR_L2SPEC_LEN,		/* u8, enum l2tp_l2spec_type */
-- 
1.9.1

^ permalink raw reply related

* [PATCH net v2 0/2] SCTP PMTU discovery fixes
From: Marcelo Ricardo Leitner @ 2018-01-03 22:59 UTC (permalink / raw)
  To: netdev; +Cc: linux-sctp, Xin Long, Vlad Yasevich, Neil Horman

This patchset fixes 2 issues with PMTU discovery that can lead to flood
of retransmissions.
The first patch fixes the issue for when PMTUD is disabled by the
application, while the second fixes it for when its enabled.

Please consider these to stable.

Thanks,

Marcelo Ricardo Leitner (2):
  sctp: do not retransmit upon FragNeeded if PMTU discovery is disabled
  sctp: fix the handling of ICMP Frag Needed for too small MTUs

 include/net/sctp/structs.h |  2 +-
 net/sctp/input.c           | 21 +++++++++++++++------
 net/sctp/transport.c       | 29 +++++++++++++++++++----------
 3 files changed, 35 insertions(+), 17 deletions(-)

-- 
2.14.3

^ permalink raw reply

* [PATCH net v2 1/2] sctp: do not retransmit upon FragNeeded if PMTU discovery is disabled
From: Marcelo Ricardo Leitner @ 2018-01-03 22:59 UTC (permalink / raw)
  To: netdev; +Cc: linux-sctp, Xin Long, Vlad Yasevich, Neil Horman
In-Reply-To: <cover.1515020059.git.marcelo.leitner@gmail.com>

Currently, if PMTU discovery is disabled on a given transport, but the
configured value is higher than the actual PMTU, it is likely that we
will get some icmp Frag Needed. The issue is, if PMTU discovery is
disabled, we won't update the information and will issue a
retransmission immediately, which may very well trigger another ICMP,
and another retransmission, leading to a loop.

The fix is to simply not trigger immediate retransmissions if PMTU
discovery is disabled on the given transport.

Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
---
 net/sctp/input.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/net/sctp/input.c b/net/sctp/input.c
index 621b5ca3fd1c17c3d7ef7bb1c7677ab98cebbe77..4a8e76f4834c90de9398455862423e598b8354a7 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -399,13 +399,18 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
 		return;
 	}
 
-	if (t->param_flags & SPP_PMTUD_ENABLE) {
-		/* Update transports view of the MTU */
-		sctp_transport_update_pmtu(t, pmtu);
+	if (!(t->param_flags & SPP_PMTUD_ENABLE))
+		/* We can't allow retransmitting in such case, as the
+		 * retransmission would be sized just as before, and thus we
+		 * would get another icmp, and retransmit again.
+		 */
+		return;
 
-		/* Update association pmtu. */
-		sctp_assoc_sync_pmtu(asoc);
-	}
+	/* Update transports view of the MTU */
+	sctp_transport_update_pmtu(t, pmtu);
+
+	/* Update association pmtu. */
+	sctp_assoc_sync_pmtu(asoc);
 
 	/* Retransmit with the new pmtu setting.
 	 * Normally, if PMTU discovery is disabled, an ICMP Fragmentation
-- 
2.14.3

^ permalink raw reply related

* [PATCH net v2 2/2] sctp: fix the handling of ICMP Frag Needed for too small MTUs
From: Marcelo Ricardo Leitner @ 2018-01-03 22:59 UTC (permalink / raw)
  To: netdev; +Cc: linux-sctp, Xin Long, Vlad Yasevich, Neil Horman
In-Reply-To: <cover.1515020059.git.marcelo.leitner@gmail.com>

syzbot reported a hang involving SCTP, on which it kept flooding dmesg
with the message:
[  246.742374] sctp: sctp_transport_update_pmtu: Reported pmtu 508 too
low, using default minimum of 512

That happened because whenever SCTP hits an ICMP Frag Needed, it tries
to adjust to the new MTU and triggers an immediate retransmission. But
it didn't consider the fact that MTUs smaller than the SCTP minimum MTU
allowed (512) would not cause the PMTU to change, and issued the
retransmission anyway (thus leading to another ICMP Frag Needed, and so
on).

As IPv4 (ip_rt_min_pmtu=556) and IPv6 (IPV6_MIN_MTU=1280) minimum MTU
are higher than that, sctp_transport_update_pmtu() is changed to
re-fetch the PMTU that got set after our request, and with that, detect
if there was an actual change or not.

The fix, thus, skips the immediate retransmission if the received ICMP
resulted in no change, in the hope that SCTP will select another path.

Note: The value being used for the minimum MTU (512,
SCTP_DEFAULT_MINSEGMENT) is not right and instead it should be (576,
SCTP_MIN_PMTU), but such change belongs to another patch.

Changes from v1:
- do not disable PMTU discovery, in the light of commit
06ad391919b2 ("[SCTP] Don't disable PMTU discovery when mtu is small")
and as suggested by Xin Long.
- changed the way to break the rtx loop by detecting if the icmp
  resulted in a change or not.
- change the warn to use pr_warn_ratelimited, to protect in case there
  are several transports using such path.

See-also: https://lkml.org/lkml/2017/12/22/811
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
---
 include/net/sctp/structs.h |  2 +-
 net/sctp/input.c           |  8 ++++++--
 net/sctp/transport.c       | 29 +++++++++++++++++++----------
 3 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 2f8f93da5dc2660f4db37c04f8a434809b3120a1..9a5ccf03a59b1e0a4820e2a978c66f1df8a9a82f 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -966,7 +966,7 @@ void sctp_transport_burst_limited(struct sctp_transport *);
 void sctp_transport_burst_reset(struct sctp_transport *);
 unsigned long sctp_transport_timeout(struct sctp_transport *);
 void sctp_transport_reset(struct sctp_transport *t);
-void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
+bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu);
 void sctp_transport_immediate_rtx(struct sctp_transport *);
 void sctp_transport_dst_release(struct sctp_transport *t);
 void sctp_transport_dst_confirm(struct sctp_transport *t);
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 4a8e76f4834c90de9398455862423e598b8354a7..3cddbecabaa2db410971c66d3256c466c7f5289f 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -406,8 +406,12 @@ void sctp_icmp_frag_needed(struct sock *sk, struct sctp_association *asoc,
 		 */
 		return;
 
-	/* Update transports view of the MTU */
-	sctp_transport_update_pmtu(t, pmtu);
+	/* Update transports view of the MTU. Return if no update was needed.
+	 * If an update wasn't needed/possible, it also doesn't make sense to
+	 * try to retransmit now.
+	 */
+	if (!sctp_transport_update_pmtu(t, pmtu))
+		return;
 
 	/* Update association pmtu. */
 	sctp_assoc_sync_pmtu(asoc);
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 1e5a22430cf56e40a6f323081beb97836b506384..47f82bd794d915188bad037463c2aa14175a55ef 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -248,28 +248,37 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk)
 		transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT;
 }
 
-void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
+bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu)
 {
 	struct dst_entry *dst = sctp_transport_dst_check(t);
+	bool change = true;
 
 	if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) {
-		pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n",
-			__func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
-		/* Use default minimum segment size and disable
-		 * pmtu discovery on this transport.
-		 */
-		t->pathmtu = SCTP_DEFAULT_MINSEGMENT;
-	} else {
-		t->pathmtu = pmtu;
+		pr_warn_ratelimited("%s: Reported pmtu %d too low, using default minimum of %d\n",
+				    __func__, pmtu, SCTP_DEFAULT_MINSEGMENT);
+		/* Use default minimum segment instead */
+		pmtu = SCTP_DEFAULT_MINSEGMENT;
 	}
+	pmtu = SCTP_TRUNC4(pmtu);
 
 	if (dst) {
 		dst->ops->update_pmtu(dst, t->asoc->base.sk, NULL, pmtu);
 		dst = sctp_transport_dst_check(t);
 	}
 
-	if (!dst)
+	if (!dst) {
 		t->af_specific->get_dst(t, &t->saddr, &t->fl, t->asoc->base.sk);
+		dst = t->dst;
+	}
+
+	if (dst) {
+		/* Re-fetch, as under layers may have a higher minimum size */
+		pmtu = SCTP_TRUNC4(dst_mtu(dst));
+		change = t->pathmtu != pmtu;
+	}
+	t->pathmtu = pmtu;
+
+	return change;
 }
 
 /* Caches the dst entry and source address for a transport's destination
-- 
2.14.3

^ permalink raw reply related

* Re: [RFC PATCH net-next 03/19] ipv6: Clear nexthop flags upon netdev up
From: David Ahern @ 2018-01-03 23:08 UTC (permalink / raw)
  To: Ido Schimmel; +Cc: Ido Schimmel, netdev, davem, roopa, nicolas.dichtel, mlxsw
In-Reply-To: <20180103205321.GA13248@splinter>

On 1/3/18 1:53 PM, Ido Schimmel wrote:
> On Wed, Jan 03, 2018 at 11:47:16AM -0700, David Ahern wrote:
>> On 1/3/18 10:40 AM, Ido Schimmel wrote:
>>> David, can we please get back to the issue at hand? What's the problem
>>> with the location of the call to rt6_sync_up()?
>>
>> My original comment was asking why do it on NETDEV_CHANGE when it should
>> only be needed on NETDEV_UP.
> 
> I can condition the call to rt6_sync_up() on the event being NETDEV_UP,
> but the location needs to stay the same. Before that the interface still
> doesn't have an IP address.

it's fine as is. The NETDEV_CHANGE section has a break after calling
rt6_sync_up(RTNH_F_LINKDOWN) which is added later. To your point,
addrconf_dev_config will add a linklocal address.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox