Netdev List
 help / color / mirror / Atom feed
* [PATCH RFC 2/5] net:stmmac: fix rx buffer allocation.
From: Jimmy Perchet @ 2013-10-16 15:24 UTC (permalink / raw)
  To: peppe.cavallaro; +Cc: netdev, Jimmy Perchet
In-Reply-To: <1381937052-8999-1-git-send-email-jimmy.perchet@parrot.com>

Rx buffers used wrong size, because priv->dma_buf_sz was updated after allocation.

Signed-off-by: Jimmy Perchet <jimmy.perchet@parrot.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 170f043..0015175 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -998,6 +998,9 @@ static int init_dma_desc_rings(struct net_device *dev)
 	if (bfsize < BUF_SIZE_16KiB)
 		bfsize = stmmac_set_bfsize(dev->mtu, priv->dma_buf_sz);
 
+	priv->dma_buf_sz = bfsize;
+	buf_sz = bfsize;
+
 	if (netif_msg_probe(priv))
 		pr_debug("%s: txsize %d, rxsize %d, bfsize %d\n", __func__,
 			 txsize, rxsize, bfsize);
@@ -1087,8 +1090,6 @@ static int init_dma_desc_rings(struct net_device *dev)
 	}
 	priv->cur_rx = 0;
 	priv->dirty_rx = (unsigned int)(i - rxsize);
-	priv->dma_buf_sz = bfsize;
-	buf_sz = bfsize;
 
 	/* Setup the chained descriptor addresses */
 	if (priv->mode == STMMAC_CHAIN_MODE) {
-- 
1.8.1.2

^ permalink raw reply related

* [PATCH RFC 3/5] net:stmmac: ensure we reclaim all dirty descriptors.
From: Jimmy Perchet @ 2013-10-16 15:24 UTC (permalink / raw)
  To: peppe.cavallaro; +Cc: netdev, Jimmy Perchet
In-Reply-To: <1381937052-8999-1-git-send-email-jimmy.perchet@parrot.com>

On low speed link (10MBit/s), some TX descriptors can remain dirty
if the tx coalescence timer expires before they were treated. Re-arm timer
in this case.

Signed-off-by: Jimmy Perchet <jimmy.perchet@parrot.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 0015175..af04b5d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1284,8 +1284,12 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
 			p = priv->dma_tx + entry;
 
 		/* Check if the descriptor is owned by the DMA. */
-		if (priv->hw->desc->get_tx_owner(p))
+		if (priv->hw->desc->get_tx_owner(p)) {
+			/* Be sure to harvest remaining descriptor. */
+			mod_timer(&priv->txtimer,
+			  STMMAC_COAL_TIMER(priv->tx_coal_timer));
 			break;
+		}
 
 		/* Verify tx error by looking at the last segment. */
 		last = priv->hw->desc->get_tx_ls(p);
-- 
1.8.1.2

^ permalink raw reply related

* [PATCH RFC 4/5] net:stmmac: fix jumbo frame handling.
From: Jimmy Perchet @ 2013-10-16 15:24 UTC (permalink / raw)
  To: peppe.cavallaro; +Cc: netdev, Jimmy Perchet
In-Reply-To: <1381937052-8999-1-git-send-email-jimmy.perchet@parrot.com>

This patch addresses several issues which prevent jumbo frames from working properly :
.jumbo frames' last descriptor was not closed
.several confusion regarding descriptor's max buffer size
.frags could not be jumbo

Signed-off-by: Jimmy Perchet <jimmy.perchet@parrot.com>
---
 drivers/net/ethernet/stmicro/stmmac/chain_mode.c  |  95 ++++++++----------
 drivers/net/ethernet/stmicro/stmmac/common.h      |   6 ++
 drivers/net/ethernet/stmicro/stmmac/descs_com.h   |   8 +-
 drivers/net/ethernet/stmicro/stmmac/enh_desc.c    |   6 ++
 drivers/net/ethernet/stmicro/stmmac/norm_desc.c   |   6 ++
 drivers/net/ethernet/stmicro/stmmac/ring_mode.c   |  90 ++++++++---------
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 112 +++++++++++-----------
 7 files changed, 158 insertions(+), 165 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
index d234ab5..d6ed0ce 100644
--- a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
@@ -28,70 +28,58 @@
 
 #include "stmmac.h"
 
-static unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
+static unsigned int stmmac_prepare_frm(void *p, void *data, unsigned int len,
+						 int csum, unsigned int entry)
 {
 	struct stmmac_priv *priv = (struct stmmac_priv *)p;
-	unsigned int txsize = priv->dma_tx_size;
-	unsigned int entry = priv->cur_tx % txsize;
-	struct dma_desc *desc = priv->dma_tx + entry;
-	unsigned int nopaged_len = skb_headlen(skb);
+	unsigned int entry_count = 0;
+	struct dma_desc *desc;
 	unsigned int bmax;
-	unsigned int i = 1, len;
 
-	if (priv->plat->enh_desc)
-		bmax = BUF_SIZE_8KiB;
-	else
-		bmax = BUF_SIZE_2KiB;
-
-	len = nopaged_len - bmax;
-
-	desc->des2 = dma_map_single(priv->device, skb->data,
-				    bmax, DMA_TO_DEVICE);
-	priv->tx_skbuff_dma[entry] = desc->des2;
-	priv->hw->desc->prepare_tx_desc(desc, 1, bmax, csum, STMMAC_CHAIN_MODE);
-
-	while (len != 0) {
-		entry = (++priv->cur_tx) % txsize;
+	if (priv->plat->enh_desc) {
+		desc = (struct dma_desc *)(priv->dma_etx + entry);
+		bmax = BUF_SIZE_8KiB - 1;
+	} else{
 		desc = priv->dma_tx + entry;
-
-		if (len > bmax) {
-			desc->des2 = dma_map_single(priv->device,
-						    (skb->data + bmax * i),
-						    bmax, DMA_TO_DEVICE);
-			priv->tx_skbuff_dma[entry] = desc->des2;
-			priv->hw->desc->prepare_tx_desc(desc, 0, bmax, csum,
-							STMMAC_CHAIN_MODE);
-			priv->hw->desc->set_tx_owner(desc);
-			priv->tx_skbuff[entry] = NULL;
-			len -= bmax;
-			i++;
-		} else {
-			desc->des2 = dma_map_single(priv->device,
-						    (skb->data + bmax * i), len,
-						    DMA_TO_DEVICE);
-			priv->tx_skbuff_dma[entry] = desc->des2;
-			priv->hw->desc->prepare_tx_desc(desc, 0, len, csum,
-							STMMAC_CHAIN_MODE);
-			priv->hw->desc->set_tx_owner(desc);
-			priv->tx_skbuff[entry] = NULL;
-			len = 0;
-		}
+		bmax = BUF_SIZE_2KiB - 1;
 	}
-	return entry;
-}
 
-static unsigned int stmmac_is_jumbo_frm(int len, int enh_desc)
-{
-	unsigned int ret = 0;
+	while (len > bmax) {
+		desc->des2 = dma_map_single(priv->device,
+					    data + bmax*entry_count,
+					    bmax, DMA_TO_DEVICE);
+		priv->tx_skbuff_dma[entry] = desc->des2;
+		priv->tx_skbuff[entry] = NULL;
+		priv->hw->desc->prepare_tx_desc(desc, 0, bmax, csum,
+						STMMAC_CHAIN_MODE);
+
+		len -= bmax;
+		entry++;
+		entry %= priv->dma_tx_size;
+		entry_count++;
+
+		if (priv->plat->enh_desc)
+			desc = (struct dma_desc *)
+					(((struct dma_extended_desc *)desc)+1);
+		else
+			desc++;
+	}
 
-	if ((enh_desc && (len > BUF_SIZE_8KiB)) ||
-	    (!enh_desc && (len > BUF_SIZE_2KiB))) {
-		ret = 1;
+	if (len)	{
+		desc->des2 = dma_map_single(priv->device,
+					    data + bmax*entry_count,
+					    len, DMA_TO_DEVICE);
+		priv->tx_skbuff_dma[entry] = desc->des2;
+		priv->tx_skbuff[entry] = NULL;
+		priv->hw->desc->prepare_tx_desc(desc, 0, len, csum,
+						STMMAC_CHAIN_MODE);
+		entry_count++;
 	}
 
-	return ret;
+	return entry_count;
 }
 
+
 static void stmmac_init_dma_chain(void *des, dma_addr_t phy_addr,
 				  unsigned int size, unsigned int extend_desc)
 {
@@ -154,8 +142,7 @@ static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p)
 
 const struct stmmac_chain_mode_ops chain_mode_ops = {
 	.init = stmmac_init_dma_chain,
-	.is_jumbo_frm = stmmac_is_jumbo_frm,
-	.jumbo_frm = stmmac_jumbo_frm,
+	.prepare_frm = stmmac_prepare_frm,
 	.refill_desc3 = stmmac_refill_desc3,
 	.clean_desc3 = stmmac_clean_desc3,
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 7eb8bab..5d3f734 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -323,6 +323,8 @@ struct stmmac_desc_ops {
 	/* Handle extra events on specific interrupts hw dependent */
 	int (*get_rx_owner) (struct dma_desc *p);
 	void (*set_rx_owner) (struct dma_desc *p);
+
+	void (*set_tx_first) (struct dma_desc *p);
 	/* Get the receive frame size */
 	int (*get_rx_frame_len) (struct dma_desc *p, int rx_coe_type);
 	/* Return the reception status looking at the RDES1 */
@@ -421,6 +423,8 @@ struct mii_regs {
 struct stmmac_ring_mode_ops {
 	unsigned int (*is_jumbo_frm) (int len, int ehn_desc);
 	unsigned int (*jumbo_frm) (void *priv, struct sk_buff *skb, int csum);
+	unsigned int (*prepare_frm) (void *p, void *data, unsigned int len,
+						int csum, unsigned int entry);
 	void (*refill_desc3) (void *priv, struct dma_desc *p);
 	void (*init_desc3) (struct dma_desc *p);
 	void (*clean_desc3) (void *priv, struct dma_desc *p);
@@ -432,6 +436,8 @@ struct stmmac_chain_mode_ops {
 		      unsigned int extend_desc);
 	unsigned int (*is_jumbo_frm) (int len, int ehn_desc);
 	unsigned int (*jumbo_frm) (void *priv, struct sk_buff *skb, int csum);
+	unsigned int (*prepare_frm) (void *p, void *data, unsigned int len,
+					 int csum, unsigned int entry);
 	void (*refill_desc3) (void *priv, struct dma_desc *p);
 	void (*clean_desc3) (void *priv, struct dma_desc *p);
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/descs_com.h b/drivers/net/ethernet/stmicro/stmmac/descs_com.h
index 6f2cc78..cf199d6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/descs_com.h
+++ b/drivers/net/ethernet/stmicro/stmmac/descs_com.h
@@ -53,9 +53,9 @@ static inline void enh_desc_end_tx_desc_on_ring(struct dma_desc *p, int ter)
 
 static inline void enh_set_tx_desc_len_on_ring(struct dma_desc *p, int len)
 {
-	if (unlikely(len > BUF_SIZE_4KiB)) {
-		p->des01.etx.buffer1_size = BUF_SIZE_4KiB;
-		p->des01.etx.buffer2_size = len - BUF_SIZE_4KiB;
+	if (unlikely(len >= BUF_SIZE_8KiB)) {
+		p->des01.etx.buffer1_size = BUF_SIZE_8KiB - 1;
+		p->des01.etx.buffer2_size = len - p->des01.etx.buffer1_size;
 	} else
 		p->des01.etx.buffer1_size = len;
 }
@@ -81,7 +81,7 @@ static inline void ndesc_end_tx_desc_on_ring(struct dma_desc *p, int ter)
 
 static inline void norm_set_tx_desc_len_on_ring(struct dma_desc *p, int len)
 {
-	if (unlikely(len > BUF_SIZE_2KiB)) {
+	if (unlikely(len >= BUF_SIZE_2KiB)) {
 		p->des01.etx.buffer1_size = BUF_SIZE_2KiB - 1;
 		p->des01.etx.buffer2_size = len - p->des01.etx.buffer1_size;
 	} else
diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index 7e6628a..915a7ab 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -297,6 +297,11 @@ static void enh_desc_release_tx_desc(struct dma_desc *p, int mode)
 		enh_desc_end_tx_desc_on_ring(p, ter);
 }
 
+static void enh_desc_set_tx_first(struct dma_desc *p)
+{
+	p->des01.etx.first_segment = 1;
+}
+
 static void enh_desc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
 				     int csum_flag, int mode)
 {
@@ -393,6 +398,7 @@ const struct stmmac_desc_ops enh_desc_ops = {
 	.get_tx_ls = enh_desc_get_tx_ls,
 	.set_tx_owner = enh_desc_set_tx_owner,
 	.set_rx_owner = enh_desc_set_rx_owner,
+	.set_tx_first = enh_desc_set_tx_first,
 	.get_rx_frame_len = enh_desc_get_rx_frame_len,
 	.rx_extended_status = enh_desc_get_ext_status,
 	.enable_tx_timestamp = enh_desc_enable_tx_timestamp,
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index 35ad4f4..6363776 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -180,6 +180,11 @@ static void ndesc_release_tx_desc(struct dma_desc *p, int mode)
 		ndesc_end_tx_desc_on_ring(p, ter);
 }
 
+static void ndesc_set_tx_first(struct dma_desc *p)
+{
+	p->des01.etx.first_segment = 1;
+}
+
 static void ndesc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
 				  int csum_flag, int mode)
 {
@@ -265,6 +270,7 @@ const struct stmmac_desc_ops ndesc_ops = {
 	.get_tx_ls = ndesc_get_tx_ls,
 	.set_tx_owner = ndesc_set_tx_owner,
 	.set_rx_owner = ndesc_set_rx_owner,
+	.set_tx_first = ndesc_set_tx_first,
 	.get_rx_frame_len = ndesc_get_rx_frame_len,
 	.enable_tx_timestamp = ndesc_enable_tx_timestamp,
 	.get_tx_timestamp_status = ndesc_get_tx_timestamp_status,
diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
index 1ef9d8a..7faa42a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
@@ -28,73 +28,60 @@
 
 #include "stmmac.h"
 
-static unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
+
+static unsigned int stmmac_prepare_frm(void *p, void *data, unsigned int len,
+						 int csum, unsigned int entry)
 {
 	struct stmmac_priv *priv = (struct stmmac_priv *)p;
-	unsigned int txsize = priv->dma_tx_size;
-	unsigned int entry = priv->cur_tx % txsize;
+	unsigned int entry_count = 0;
 	struct dma_desc *desc;
-	unsigned int nopaged_len = skb_headlen(skb);
-	unsigned int bmax, len;
+	unsigned int bmax;
 
-	if (priv->extend_desc)
+	if (priv->plat->enh_desc) {
 		desc = (struct dma_desc *)(priv->dma_etx + entry);
-	else
+		bmax = BUF_SIZE_8KiB - 1;
+	} else{
 		desc = priv->dma_tx + entry;
+		bmax = BUF_SIZE_2KiB - 1;
+	}
 
-	if (priv->plat->enh_desc)
-		bmax = BUF_SIZE_8KiB;
-	else
-		bmax = BUF_SIZE_2KiB;
-
-	len = nopaged_len - bmax;
-
-	if (nopaged_len > BUF_SIZE_8KiB) {
-
-		desc->des2 = dma_map_single(priv->device, skb->data,
-					    bmax, DMA_TO_DEVICE);
+	while (len > 2*bmax) {
+		desc->des2 = dma_map_single(priv->device,
+						data + 2*bmax*entry_count,
+						2*bmax, DMA_TO_DEVICE);
 		priv->tx_skbuff_dma[entry] = desc->des2;
-		desc->des3 = desc->des2 + BUF_SIZE_4KiB;
-		priv->hw->desc->prepare_tx_desc(desc, 1, bmax, csum,
+		priv->tx_skbuff[entry] = NULL;
+		desc->des3 = desc->des2 + bmax;
+		priv->hw->desc->prepare_tx_desc(desc, 0, 2*bmax, csum,
 						STMMAC_RING_MODE);
-		wmb();
-		entry = (++priv->cur_tx) % txsize;
 
-		if (priv->extend_desc)
-			desc = (struct dma_desc *)(priv->dma_etx + entry);
+		len -= 2*bmax;
+		entry++;
+		entry %= priv->dma_tx_size;
+		entry_count++;
+
+		if (priv->plat->enh_desc)
+			desc = (struct dma_desc *)
+					(((struct dma_extended_desc *)desc)+1);
 		else
-			desc = priv->dma_tx + entry;
+			desc++;
+	}
+	if (len) {
 
-		desc->des2 = dma_map_single(priv->device, skb->data + bmax,
-					    len, DMA_TO_DEVICE);
+		desc->des2 = dma_map_single(priv->device,
+						data + 2*bmax*entry_count,
+						len, DMA_TO_DEVICE);
 		priv->tx_skbuff_dma[entry] = desc->des2;
-		desc->des3 = desc->des2 + BUF_SIZE_4KiB;
-		priv->hw->desc->prepare_tx_desc(desc, 0, len, csum,
-						STMMAC_RING_MODE);
-		wmb();
-		priv->hw->desc->set_tx_owner(desc);
 		priv->tx_skbuff[entry] = NULL;
-	} else {
-		desc->des2 = dma_map_single(priv->device, skb->data,
-					    nopaged_len, DMA_TO_DEVICE);
-		priv->tx_skbuff_dma[entry] = desc->des2;
-		desc->des3 = desc->des2 + BUF_SIZE_4KiB;
-		priv->hw->desc->prepare_tx_desc(desc, 1, nopaged_len, csum,
+		desc->des3 = desc->des2 + bmax;
+		priv->hw->desc->prepare_tx_desc(desc, 0, len, csum,
 						STMMAC_RING_MODE);
+		entry_count++;
 	}
 
-	return entry;
+	return entry_count;
 }
 
-static unsigned int stmmac_is_jumbo_frm(int len, int enh_desc)
-{
-	unsigned int ret = 0;
-
-	if (len >= BUF_SIZE_4KiB)
-		ret = 1;
-
-	return ret;
-}
 
 static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p)
 {
@@ -103,13 +90,13 @@ static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p)
 	if (unlikely(priv->plat->has_gmac))
 		/* Fill DES3 in case of RING mode */
 		if (priv->dma_buf_sz >= BUF_SIZE_8KiB)
-			p->des3 = p->des2 + BUF_SIZE_8KiB;
+			p->des3 = p->des2 + BUF_SIZE_8KiB - 1;
 }
 
 /* In ring mode we need to fill the desc3 because it is used as buffer */
 static void stmmac_init_desc3(struct dma_desc *p)
 {
-	p->des3 = p->des2 + BUF_SIZE_8KiB;
+	p->des3 = p->des2 + BUF_SIZE_8KiB - 1;
 }
 
 static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p)
@@ -127,8 +114,7 @@ static int stmmac_set_16kib_bfsize(int mtu)
 }
 
 const struct stmmac_ring_mode_ops ring_mode_ops = {
-	.is_jumbo_frm = stmmac_is_jumbo_frm,
-	.jumbo_frm = stmmac_jumbo_frm,
+	.prepare_frm = stmmac_prepare_frm,
 	.refill_desc3 = stmmac_refill_desc3,
 	.init_desc3 = stmmac_init_desc3,
 	.clean_desc3 = stmmac_clean_desc3,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index af04b5d..5873246 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1832,6 +1832,20 @@ static int stmmac_release(struct net_device *dev)
 	return 0;
 }
 
+
+static struct dma_desc *get_desc(struct stmmac_priv *priv, unsigned int entry)
+{
+	struct dma_desc *desc;
+
+	if (priv->plat->enh_desc)
+		desc = (struct dma_desc *)(priv->dma_etx + entry);
+	else
+		desc = priv->dma_tx + entry;
+
+	return  desc;
+}
+
+
 /**
  *  stmmac_xmit: Tx entry point of the driver
  *  @skb : the socket buffer
@@ -1844,11 +1858,10 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
 	unsigned int txsize = priv->dma_tx_size;
-	unsigned int entry;
-	int i, csum_insertion = 0, is_jumbo = 0;
+	unsigned int entry, first_entry, nb_desc = 0;
+	int i, csum_insertion = 0;
 	int nfrags = skb_shinfo(skb)->nr_frags;
-	struct dma_desc *desc, *first;
-	unsigned int nopaged_len = skb_headlen(skb);
+	struct dma_desc *desc = NULL, *first;
 
 	if (unlikely(stmmac_tx_avail(priv) < nfrags + 1)) {
 		if (!netif_queue_stopped(dev)) {
@@ -1858,73 +1871,53 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 		return NETDEV_TX_BUSY;
 	}
-
 	spin_lock(&priv->tx_lock);
 
 	if (priv->tx_path_in_lpi_mode)
 		stmmac_disable_eee_mode(priv);
 
-	entry = priv->cur_tx % txsize;
+	first_entry = entry = priv->cur_tx % txsize;
 
 	csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL);
-
-	if (priv->extend_desc)
-		desc = (struct dma_desc *)(priv->dma_etx + entry);
+	/* To program the descriptors according to the size of the frame */
+	if (priv->mode == STMMAC_RING_MODE)
+		entry += priv->hw->ring->prepare_frm(priv, skb->data,
+				skb_headlen(skb), csum_insertion, entry);
 	else
-		desc = priv->dma_tx + entry;
+		entry += priv->hw->chain->prepare_frm(priv, skb->data,
+				 skb_headlen(skb), csum_insertion, entry);
 
-	first = desc;
-
-	priv->tx_skbuff[entry] = skb;
-
-	/* To program the descriptors according to the size of the frame */
-	if (priv->mode == STMMAC_RING_MODE) {
-		is_jumbo = priv->hw->ring->is_jumbo_frm(skb->len,
-							priv->plat->enh_desc);
-		if (unlikely(is_jumbo))
-			entry = priv->hw->ring->jumbo_frm(priv, skb,
-							  csum_insertion);
-	} else {
-		is_jumbo = priv->hw->chain->is_jumbo_frm(skb->len,
-							 priv->plat->enh_desc);
-		if (unlikely(is_jumbo))
-			entry = priv->hw->chain->jumbo_frm(priv, skb,
-							   csum_insertion);
-	}
-	if (likely(!is_jumbo)) {
-		desc->des2 = dma_map_single(priv->device, skb->data,
-					    nopaged_len, DMA_TO_DEVICE);
-		priv->tx_skbuff_dma[entry] = desc->des2;
-		priv->hw->desc->prepare_tx_desc(desc, 1, nopaged_len,
-						csum_insertion, priv->mode);
-	} else
-		desc = first;
+	entry %= txsize;
 
 	for (i = 0; i < nfrags; i++) {
 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-		int len = skb_frag_size(frag);
 
-		entry = (++priv->cur_tx) % txsize;
-		if (priv->extend_desc)
-			desc = (struct dma_desc *)(priv->dma_etx + entry);
+		if (priv->mode == STMMAC_RING_MODE)
+			entry += priv->hw->ring->prepare_frm(priv,
+						 skb_frag_address(frag),
+						 skb_frag_size(frag),
+						 csum_insertion, entry);
 		else
-			desc = priv->dma_tx + entry;
-
-		desc->des2 = skb_frag_dma_map(priv->device, frag, 0, len,
-					      DMA_TO_DEVICE);
-		priv->tx_skbuff_dma[entry] = desc->des2;
-		priv->tx_skbuff[entry] = NULL;
-		priv->hw->desc->prepare_tx_desc(desc, 0, len, csum_insertion,
-						priv->mode);
-		wmb();
-		priv->hw->desc->set_tx_owner(desc);
-		wmb();
-	}
-
+			entry += priv->hw->chain->prepare_frm(priv,
+						 skb_frag_address(frag),
+						 skb_frag_size(frag),
+						 csum_insertion, entry);
+
+		entry %= txsize;
+	}
+	/*Set owner for all segment but the first one */
+	for (i = first_entry; i != entry;) {
+		desc = get_desc(priv, i);
+		nb_desc++;
+		if (i != first_entry)
+			priv->hw->desc->set_tx_owner(desc);
+		i++;
+		i %= txsize;
+	}
+	BUG_ON(desc == NULL);
 	/* Finalize the latest segment. */
 	priv->hw->desc->close_tx_desc(desc);
 
-	wmb();
 	/* According to the coalesce parameter the IC bit for the latest
 	 * segment could be reset and the timer re-started to invoke the
 	 * stmmac_tx function. This approach takes care about the fragments.
@@ -1938,11 +1931,20 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	} else
 		priv->tx_count_frames = 0;
 
+	/*Prepare first segment */
+	priv->tx_skbuff[first_entry] = skb;
+
+	first = get_desc(priv, first_entry);
+
+	priv->hw->desc->set_tx_first(first);
+
+	wmb();
+
 	/* To avoid raise condition */
 	priv->hw->desc->set_tx_owner(first);
 	wmb();
 
-	priv->cur_tx++;
+	priv->cur_tx += nb_desc;
 
 	if (netif_msg_pktdata(priv)) {
 		pr_debug("%s: curr %d dirty=%d entry=%d, first=%p, nfrags=%d",
-- 
1.8.1.2

^ permalink raw reply related

* [PATCH RFC 5/5] net:stmmac: asynchronous tx_clean
From: Jimmy Perchet @ 2013-10-16 15:24 UTC (permalink / raw)
  To: peppe.cavallaro; +Cc: netdev, Jimmy Perchet
In-Reply-To: <1381937052-8999-1-git-send-email-jimmy.perchet@parrot.com>

Tx descriptor's cleanup and preparation are serialized, which is not necessary
and decrease performance.
In addition TX descriptor's cleanup is performed on NET_RX softirq, this is
confusing.

This patch unserialize tx descriptor's cleanup and preparation
and defer cleanup in workqueue.

Signed-off-by: Jimmy Perchet <jimmy.perchet@parrot.com>
---
 drivers/net/ethernet/stmicro/stmmac/chain_mode.c  |  6 +--
 drivers/net/ethernet/stmicro/stmmac/stmmac.h      |  6 ++-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 52 +++++++++++++----------
 3 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
index d6ed0ce..8ab83cb 100644
--- a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
@@ -120,7 +120,7 @@ static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p)
 		 * to keep explicit chaining in the descriptor.
 		 */
 		p->des3 = (unsigned int)(priv->dma_rx_phy +
-					 (((priv->dirty_rx) + 1) %
+					 ((priv->dirty_rx + 1) %
 					  priv->dma_rx_size) *
 					 sizeof(struct dma_desc));
 }
@@ -135,9 +135,9 @@ static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p)
 		 * to keep explicit chaining in the descriptor.
 		 */
 		p->des3 = (unsigned int)(priv->dma_tx_phy +
-					 (((priv->dirty_tx + 1) %
+					 ((atomic_read(&priv->dirty_tx) + 1) %
 					   priv->dma_tx_size) *
-					  sizeof(struct dma_desc)));
+					  sizeof(struct dma_desc));
 }
 
 const struct stmmac_chain_mode_ops chain_mode_ops = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index f16a9bd..d5b8906 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -38,8 +38,8 @@ struct stmmac_priv {
 	struct dma_extended_desc *dma_etx ____cacheline_aligned_in_smp;
 	struct dma_desc *dma_tx;
 	struct sk_buff **tx_skbuff;
-	unsigned int cur_tx;
-	unsigned int dirty_tx;
+	atomic_t cur_tx;
+	atomic_t dirty_tx;
 	unsigned int dma_tx_size;
 	u32 tx_count_frames;
 	u32 tx_coal_frames;
@@ -106,6 +106,8 @@ struct stmmac_priv {
 	u32 adv_ts;
 	int use_riwt;
 	spinlock_t ptp_lock;
+	struct work_struct txclean_work;
+	struct workqueue_struct *wq;
 };
 
 extern int phyaddr;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 5873246..de614ad 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -48,6 +48,7 @@
 #include <linux/seq_file.h>
 #endif /* CONFIG_STMMAC_DEBUG_FS */
 #include <linux/net_tstamp.h>
+#include <linux/workqueue.h>
 #include "stmmac_ptp.h"
 #include "stmmac.h"
 
@@ -205,7 +206,8 @@ static void print_pkt(unsigned char *buf, int len)
 
 static inline u32 stmmac_tx_avail(struct stmmac_priv *priv)
 {
-	return priv->dirty_tx + priv->dma_tx_size - priv->cur_tx - 1;
+	return atomic_read(&priv->dirty_tx) + priv->dma_tx_size
+					 - atomic_read(&priv->cur_tx) - 1;
 }
 
 /**
@@ -230,7 +232,7 @@ static inline void stmmac_hw_fix_mac_speed(struct stmmac_priv *priv)
 static void stmmac_enable_eee_mode(struct stmmac_priv *priv)
 {
 	/* Check and enter in LPI mode */
-	if ((priv->dirty_tx == priv->cur_tx) &&
+	if ((atomic_read(&priv->dirty_tx) == atomic_read(&priv->cur_tx)) &&
 	    (priv->tx_path_in_lpi_mode == false))
 		priv->hw->mac->set_eee_mode(priv->ioaddr);
 }
@@ -1118,8 +1120,8 @@ static int init_dma_desc_rings(struct net_device *dev)
 		priv->tx_skbuff[i] = NULL;
 	}
 
-	priv->dirty_tx = 0;
-	priv->cur_tx = 0;
+	atomic_set(&priv->dirty_tx, 0);
+	atomic_set(&priv->cur_tx, 0);
 
 	stmmac_clear_descriptors(priv);
 
@@ -1264,17 +1266,17 @@ static void stmmac_dma_operation_mode(int mtu, struct stmmac_priv *priv)
  * @priv: driver private structure
  * Description: it reclaims resources after transmission completes.
  */
-static void stmmac_tx_clean(struct stmmac_priv *priv)
+static void stmmac_tx_clean(struct work_struct *work)
 {
+	struct stmmac_priv *priv =
+			container_of(work, struct stmmac_priv, txclean_work);
 	unsigned int txsize = priv->dma_tx_size;
 
-	spin_lock(&priv->tx_lock);
-
 	priv->xstats.tx_clean++;
 
-	while (priv->dirty_tx != priv->cur_tx) {
+	while (atomic_read(&priv->dirty_tx) != atomic_read(&priv->cur_tx)) {
 		int last;
-		unsigned int entry = priv->dirty_tx % txsize;
+		unsigned int entry = atomic_read(&priv->dirty_tx) % txsize;
 		struct sk_buff *skb = priv->tx_skbuff[entry];
 		struct dma_desc *p;
 
@@ -1308,7 +1310,8 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
 		}
 		if (netif_msg_tx_done(priv))
 			pr_debug("%s: curr %d, dirty %d\n", __func__,
-				 priv->cur_tx, priv->dirty_tx);
+				 atomic_read(&priv->cur_tx),
+				 atomic_read(&priv->dirty_tx));
 
 		if (likely(priv->tx_skbuff_dma[entry])) {
 			dma_unmap_single(priv->device,
@@ -1326,7 +1329,8 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
 
 		priv->hw->desc->release_tx_desc(p, priv->mode);
 
-		priv->dirty_tx++;
+		wmb();
+		atomic_inc(&priv->dirty_tx);
 	}
 	if (unlikely(netif_queue_stopped(priv->dev) &&
 		     stmmac_tx_avail(priv) > STMMAC_TX_THRESH(priv))) {
@@ -1344,7 +1348,6 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
 		stmmac_enable_eee_mode(priv);
 		mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer));
 	}
-	spin_unlock(&priv->tx_lock);
 }
 
 static inline void stmmac_enable_dma_irq(struct stmmac_priv *priv)
@@ -1380,8 +1383,8 @@ static void stmmac_tx_err(struct stmmac_priv *priv)
 			priv->hw->desc->init_tx_desc(&priv->dma_tx[i],
 						     priv->mode,
 						     (i == txsize - 1));
-	priv->dirty_tx = 0;
-	priv->cur_tx = 0;
+	atomic_set(&priv->dirty_tx, 0);
+	atomic_set(&priv->cur_tx, 0);
 	priv->hw->dma->start_tx(priv->ioaddr);
 
 	priv->dev->stats.tx_errors++;
@@ -1401,12 +1404,16 @@ static void stmmac_dma_interrupt(struct stmmac_priv *priv)
 	int status;
 
 	status = priv->hw->dma->dma_interrupt(priv->ioaddr, &priv->xstats);
-	if (likely((status & handle_rx)) || (status & handle_tx)) {
+	if (likely(status & handle_rx)) {
 		if (likely(napi_schedule_prep(&priv->napi))) {
 			stmmac_disable_dma_irq(priv);
 			__napi_schedule(&priv->napi);
 		}
 	}
+	if (likely(status & handle_tx))
+		queue_work(priv->wq, &priv->txclean_work);
+
+
 	if (unlikely(status & tx_hard_error_bump_tc)) {
 		/* Try to bump up the dma threshold on this failure */
 		if (unlikely(tc != SF_DMA_MODE) && (tc <= 256)) {
@@ -1596,8 +1603,7 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 static void stmmac_tx_timer(unsigned long data)
 {
 	struct stmmac_priv *priv = (struct stmmac_priv *)data;
-
-	stmmac_tx_clean(priv);
+	queue_work(priv->wq, &priv->txclean_work);
 }
 
 /**
@@ -1876,7 +1882,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (priv->tx_path_in_lpi_mode)
 		stmmac_disable_eee_mode(priv);
 
-	first_entry = entry = priv->cur_tx % txsize;
+	first_entry = entry = atomic_read(&priv->cur_tx) % txsize;
 
 	csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL);
 	/* To program the descriptors according to the size of the frame */
@@ -1944,12 +1950,13 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	priv->hw->desc->set_tx_owner(first);
 	wmb();
 
-	priv->cur_tx += nb_desc;
+	atomic_add(nb_desc, &priv->cur_tx);
 
 	if (netif_msg_pktdata(priv)) {
 		pr_debug("%s: curr %d dirty=%d entry=%d, first=%p, nfrags=%d",
-			__func__, (priv->cur_tx % txsize),
-			(priv->dirty_tx % txsize), entry, first, nfrags);
+			__func__, (atomic_read(&priv->cur_tx) % txsize),
+			(atomic_read(&priv->dirty_tx) % txsize), entry,
+			first, nfrags);
 
 		if (priv->extend_desc)
 			stmmac_display_ring((void *)priv->dma_etx, txsize, 1);
@@ -2171,7 +2178,6 @@ static int stmmac_poll(struct napi_struct *napi, int budget)
 	int work_done = 0;
 
 	priv->xstats.napi_poll++;
-	stmmac_tx_clean(priv);
 
 	work_done = stmmac_rx(priv, budget);
 	if (work_done < budget) {
@@ -2747,6 +2753,8 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device,
 
 	spin_lock_init(&priv->lock);
 	spin_lock_init(&priv->tx_lock);
+	priv->wq = create_singlethread_workqueue("stmmac_wq");
+	INIT_WORK(&priv->txclean_work, stmmac_tx_clean);
 
 	ret = register_netdev(ndev);
 	if (ret) {
-- 
1.8.1.2

^ permalink raw reply related

* Re: [PATCH v2 net 1/4] bridge: Don't use VID 0 and 4095 in vlan filtering
From: Stephen Hemminger @ 2013-10-16 15:47 UTC (permalink / raw)
  To: Toshiaki Makita; +Cc: David S . Miller, Vlad Yasevich, netdev, Toshiaki Makita
In-Reply-To: <1381910836-718-2-git-send-email-makita.toshiaki@lab.ntt.co.jp>

On Wed, 16 Oct 2013 17:07:13 +0900
Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> wrote:

> IEEE 802.1Q says that:
> - VID 0 shall not be configured as a PVID, or configured in any Filtering
> Database entry.
> - VID 4095 shall not be configured as a PVID, or transmitted in a tag
> header. This VID value may be used to indicate a wildcard match for the VID
> in management operations or Filtering Database entries.
> (See IEEE 802.1Q-2011 6.9.1 and Table 9-2)
> 
> Don't accept adding these VIDs in the vlan_filtering implementation.
> 
> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
> Reviewed-by: Vlad Yasevich <vyasevic@redhat.com>
> ---
>  net/bridge/br_fdb.c     |  4 +-
>  net/bridge/br_netlink.c |  2 +-
>  net/bridge/br_vlan.c    | 97 +++++++++++++++++++++++--------------------------
>  3 files changed, 49 insertions(+), 54 deletions(-)

This one looks good, thanks.

Acked-by: Stephen Hemminger <stephen@networkplumber.org>

^ permalink raw reply

* Re: IPv6 path discovery oddities - flushing the routing cache resolves
From: Hannes Frederic Sowa @ 2013-10-16 15:48 UTC (permalink / raw)
  To: Valentijn Sessink; +Cc: netdev
In-Reply-To: <525E6B03.1040409@blub.net>

On Wed, Oct 16, 2013 at 12:31:31PM +0200, Valentijn Sessink wrote:
> Hello list,
> 
> I'm experiencing difficulties with IPv6 path discovery. The setup is 
> quite simple, a machine with native IPv6, no special routing - let's 
> call it the "server" for now. Unfortunately, I seem to loose 
> connectivity multiple times a day - and after digging in, I found this 
> to be "too big" messages that weren't honored at the server. The network 
> consists of something like:
> 
> server --- hosting --- others ---- SIXXS tunnel with 1280 MTU --- me.
> 
> A "ip -6 route list cache" would show a cached route to my "client", but 
> one without MTU. Then after "ip -6 route flush cache", and after trying 
> to send a large packet (for example issuing "ps uaxww" on an ssh 
> prompt), "ip -6 route list cache" will show a correct MTU.

Oh, thats another very good hint.

> But after a while, things start to go wrong again, and another "ip -6 
> route flush cache" is needed.
> 
> The server is running 3.8.0-something (ubuntu 12.04 with a newer kernel).
> 
> tcpdump shows that on reception of icmpv6 "too big", nothing happens 
> (i.e. the "too big" packet will be sent time and again), and after the 
> "ip -6 route flush cache", suddenly the "too big" message is honored.
> 
> I saw a couple of path discovery issues on this list, more specifically 
> one with the subject "IPv6 path MTU discovery broken" earlier this month 
> - but I'm not sure it's the same issue (because the original submitter 
> specifically mentions kernels 3.10 and 3.11 and has a much more 
> complicated routing table).

I do think these two issues are connected. Could you send me a the
corresponding ip route output and /proc/net/ipv6_route output for when it
works and mtus are correctly handled and when it does not work?

Thanks,

  Hannes

^ permalink raw reply

* Re: [PATCH v2 net 2/4] bridge: Apply the PVID to priority-tagged frames
From: Vlad Yasevich @ 2013-10-16 15:52 UTC (permalink / raw)
  To: Toshiaki Makita, David S . Miller, netdev; +Cc: Toshiaki Makita
In-Reply-To: <1381910836-718-3-git-send-email-makita.toshiaki@lab.ntt.co.jp>

On 10/16/2013 04:07 AM, Toshiaki Makita wrote:
> IEEE 802.1Q says that when we receive priority-tagged (VID 0) frames
> use the PVID for the port as its VID.
> (See IEEE 802.1Q-2011 6.9.1 and Table 9-2)
>
> Apply the PVID to not only untagged frames but also priority-tagged frames.
>
> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>

Reviewed-by: Vlad Yasevich <vyasevic@redhat.com>

> ---
>   net/bridge/br_vlan.c | 27 ++++++++++++++++++++-------
>   1 file changed, 20 insertions(+), 7 deletions(-)
>
> diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
> index 21b6d21..5a9c44a 100644
> --- a/net/bridge/br_vlan.c
> +++ b/net/bridge/br_vlan.c
> @@ -189,6 +189,8 @@ out:
>   bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
>   			struct sk_buff *skb, u16 *vid)
>   {
> +	int err;
> +
>   	/* If VLAN filtering is disabled on the bridge, all packets are
>   	 * permitted.
>   	 */
> @@ -201,20 +203,31 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
>   	if (!v)
>   		return false;
>
> -	if (br_vlan_get_tag(skb, vid)) {
> +	err = br_vlan_get_tag(skb, vid);
> +	if (!*vid) {
>   		u16 pvid = br_get_pvid(v);
>
> -		/* Frame did not have a tag.  See if pvid is set
> -		 * on this port.  That tells us which vlan untagged
> -		 * traffic belongs to.
> +		/* Frame had a tag with VID 0 or did not have a tag.
> +		 * See if pvid is set on this port.  That tells us which
> +		 * vlan untagged or priority-tagged traffic belongs to.
>   		 */
>   		if (pvid == VLAN_N_VID)
>   			return false;
>
> -		/* PVID is set on this port.  Any untagged ingress
> -		 * frame is considered to belong to this vlan.
> +		/* PVID is set on this port.  Any untagged or priority-tagged
> +		 * ingress frame is considered to belong to this vlan.
>   		 */
> -		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid);
> +		if (likely(err))
> +			/* Untagged Frame. */
> +			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid);
> +		else
> +			/* Priority-tagged Frame.
> +			 * At this point, We know that skb->vlan_tci had
> +			 * VLAN_TAG_PRESENT bit and its VID field was 0x000.
> +			 * We update only VID field and preserve PCP field.
> +			 */
> +			skb->vlan_tci |= pvid;
> +
>   		return true;
>   	}
>
>

^ permalink raw reply

* Re: [PATCH v2 net 2/4] bridge: Apply the PVID to priority-tagged frames
From: Stephen Hemminger @ 2013-10-16 15:55 UTC (permalink / raw)
  To: Toshiaki Makita; +Cc: David S . Miller, Vlad Yasevich, netdev, Toshiaki Makita
In-Reply-To: <1381910836-718-3-git-send-email-makita.toshiaki@lab.ntt.co.jp>

On Wed, 16 Oct 2013 17:07:14 +0900
Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> wrote:

> IEEE 802.1Q says that when we receive priority-tagged (VID 0) frames
> use the PVID for the port as its VID.
> (See IEEE 802.1Q-2011 6.9.1 and Table 9-2)
> 
> Apply the PVID to not only untagged frames but also priority-tagged frames.
> 
> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
> ---
>  net/bridge/br_vlan.c | 27 ++++++++++++++++++++-------
>  1 file changed, 20 insertions(+), 7 deletions(-)
> 
> diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
> index 21b6d21..5a9c44a 100644
> --- a/net/bridge/br_vlan.c
> +++ b/net/bridge/br_vlan.c
> @@ -189,6 +189,8 @@ out:
>  bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
>  			struct sk_buff *skb, u16 *vid)
>  {
> +	int err;
> +
>  	/* If VLAN filtering is disabled on the bridge, all packets are
>  	 * permitted.
>  	 */
> @@ -201,20 +203,31 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
>  	if (!v)
>  		return false;
>  
> -	if (br_vlan_get_tag(skb, vid)) {
> +	err = br_vlan_get_tag(skb, vid);
> +	if (!*vid) {
>  		u16 pvid = br_get_pvid(v);

Ok, but it looks like br_vlan_get_tag() could be cleaner if it just returned
the tag, and there was another br_vlan_tag_present() function.

Also, does this still work if CONFIG_BRIDGE_VLAN_FILTERING is disabled?

^ permalink raw reply

* Re: [PATCH v2 net 4/4] bridge: Fix updating FDB entries when the PVID is applied
From: Stephen Hemminger @ 2013-10-16 15:57 UTC (permalink / raw)
  To: Toshiaki Makita; +Cc: David S . Miller, Vlad Yasevich, netdev, Toshiaki Makita
In-Reply-To: <1381910836-718-5-git-send-email-makita.toshiaki@lab.ntt.co.jp>

On Wed, 16 Oct 2013 17:07:16 +0900
Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> wrote:

> We currently set the value that variable vid is pointing, which will be
> used in FDB later, to 0 at br_allowed_ingress() when we receive untagged
> or priority-tagged frames, even though the PVID is valid.
> This leads to FDB updates in such a wrong way that they are learned with
> VID 0.
> Update the value to that of PVID if the PVID is applied.
> 
> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
> Reviewed-by: Vlad Yasevich <vyasevic@redhat.com>
> ---
>  net/bridge/br_vlan.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
> index 5a9c44a..53f0990 100644
> --- a/net/bridge/br_vlan.c
> +++ b/net/bridge/br_vlan.c
> @@ -217,6 +217,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
>  		/* PVID is set on this port.  Any untagged or priority-tagged
>  		 * ingress frame is considered to belong to this vlan.
>  		 */
> +		*vid = pvid;
>  		if (likely(err))
>  			/* Untagged Frame. */
>  			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid);


Ok, but side-effects seem like an indication of poor code logic
flow design. Not your fault but part of the the per-vlan filtering code.

^ permalink raw reply

* Re: SW csum errors
From: Kyle Hubert @ 2013-10-16 15:58 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Stephen Hemminger, netdev
In-Reply-To: <1381937065.2045.131.camel@edumazet-glaptop.roam.corp.google.com>

On Wed, Oct 16, 2013 at 11:24 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> On Wed, 2013-10-16 at 11:10 -0400, Kyle Hubert wrote:
>
>> Thanks, I didn't realize it was as simple as file backed pages being
>> changed. Yes, our device does support SG, so we do have zero-copy
>> sendfile() support. I'll concoct a simple test to prove this.
>
> You also can use vmsplice()/splice() and touch anonymous memory,
> no need to play with a file ;)

Thanks, this would be even easier to test. It also reminds me that
there are other vectors to constructing SKB frags. It looks like if we
need to enable SW checksumming ever again, we should just disable the
SG feature and let the normal stack handle validity.

-Kyle

^ permalink raw reply

* Re: [PATCH net-next v4 1/5] xen-netback: add support for IPv6 checksum offload to guest
From: Ian Campbell @ 2013-10-16 16:10 UTC (permalink / raw)
  To: Paul Durrant
  Cc: xen-devel@lists.xen.org, netdev@vger.kernel.org, Wei Liu,
	David Vrabel
In-Reply-To: <9AAE0902D5BC7E449B7C8E4E778ABCD0138C0B@AMSPEX01CL01.citrite.net>

On Mon, 2013-10-14 at 12:10 +0100, Paul Durrant wrote:
> > -----Original Message-----
> > From: Ian Campbell
> > Sent: 14 October 2013 11:54
> > To: Paul Durrant
> > Cc: xen-devel@lists.xen.org; netdev@vger.kernel.org; Wei Liu; David Vrabel
> > Subject: Re: [PATCH net-next v4 1/5] xen-netback: add support for IPv6
> > checksum offload to guest
> > 
> > On Fri, 2013-10-11 at 16:06 +0100, Paul Durrant wrote:
> > > Check xenstore flag feature-ipv6-csum-offload to determine if a
> > > guest is happy to accept IPv6 packets with only partial checksum.
> > > Also check analogous feature-ip-csum-offload to determine if a
> > > guest is happy to accept IPv4 packets with only partial checksum
> > > as a replacement for a negated feature-no-csum-offload value and
> > > add a comment to deprecate use of feature-no-csum-offload.
> > >
> > > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> > > Cc: Wei Liu <wei.liu2@citrix.com>
> > > Cc: David Vrabel <david.vrabel@citrix.com>
> > > Cc: Ian Campbell <ian.campbell@citrix.com>
> > 
> > Shouldn't this come later in the series, i.e. after netback is actually
> > able to cope with ipv6 offloads?
> > 
> 
> I guess that's debatable. The patches don't have any dependency relation; offloads to and from the guest are quite independent.
> 
> > > diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-
> > netback/common.h
> > > index 5715318..b4a9a3c 100644
> > > --- a/drivers/net/xen-netback/common.h
> > > +++ b/drivers/net/xen-netback/common.h
> > > @@ -153,7 +153,8 @@ struct xenvif {
> > >  	u8 can_sg:1;
> > >  	u8 gso:1;
> > >  	u8 gso_prefix:1;
> > > -	u8 csum:1;
> > > +	u8 ip_csum:1;
> > > +	u8 ipv6_csum:1;
> > 
> > Why not ipv4_csum for consistency/unambiguity?
> > 
> 
> I followed general linux naming conventions e.g. ip_hdr and ipv6_hdr.

True. I would be more concerned about the netif.h name, but I think you
now intend to drop the v4 variant of that, so no worries.

^ permalink raw reply

* Re: [Xen-devel] DomU's network interface will hung when Dom0 running 32bit
From: David Vrabel @ 2013-10-16 16:11 UTC (permalink / raw)
  To: Wei Liu; +Cc: jianhai luan, xen-devel, annie li, Ian Campbell, netdev
In-Reply-To: <20131016151732.GJ16371@zion.uk.xensource.com>

On 16/10/13 16:17, Wei Liu wrote:
> On Wed, Oct 16, 2013 at 11:04:34PM +0800, jianhai luan wrote:
> [...]
>> >From ef02403a10173896c5c102f768741d0700b8a3a2 Mon Sep 17 00:00:00 2001
>> From: Jason Luan <jianhai.luan@oracle.com>
>> Date: Tue, 15 Oct 2013 17:07:49 +0800
>> Subject: [PATCH] xen-netback: pending timer only in the range [expire,
>>  next_credit)
>>
>> The function time_after_eq() do correct judge in range of MAX_UNLONG/2.
>> If net-front send lesser package, the delta between now and next_credit
>> will out of the range and time_after_eq() will do wrong judge in result
>> to net-front hung.  For example:
>>     expire    next_credit    ....    next_credit+MAX_UNLONG/2    now
>>     -----------------time increases this direction----------------->
>>
>> We should be add the environment which now beyond next_credit+MAX_UNLONG/2.
>> Because the fact now mustn't before expire, time_before(now, expire) == true
>> will show the environment.
>>     time_after_eq(now, next_credit) || time_before (now, expire)
>>     ==
>>     !time_in_range_open(now, expire, next_credit)
>>

I would like the description improved because it's too hard to understand.

How about something like:

"time_after_eq() only works if the delta is < MAX_ULONG/2.

If netfront sends at a very low rate, the time between subsequent calls
to tx_credit_exceeded() may exceed MAX_ULONG/2 and the test for
timer_after_eq() will be incorrect.  Credit will not be replenished and
the guest may become unable to send (e.g., if prior to the long gap, all
credit was exhausted)."

But that's as far as I get because I can't see how the fix is correct.
The time_in_range() test might still return the wrong value if now has
advanced even further and wrapped so it is between expire and
next_credit again.

I think the credit timeout should be always armed to expire in
MAX_ULONG/4 jiffies (or some other large value).  If credit is exceeded,
this timer is then adjusted to fire earlier (at next_credit as it does
already).

David

^ permalink raw reply

* Re: [PATCH v2 net 4/4] bridge: Fix updating FDB entries when the PVID is applied
From: Vlad Yasevich @ 2013-10-16 16:11 UTC (permalink / raw)
  To: Stephen Hemminger, Toshiaki Makita
  Cc: David S . Miller, netdev, Toshiaki Makita
In-Reply-To: <20131016085715.3442e8c3@nehalam.linuxnetplumber.net>

On 10/16/2013 11:57 AM, Stephen Hemminger wrote:
> On Wed, 16 Oct 2013 17:07:16 +0900
> Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> wrote:
>
>> We currently set the value that variable vid is pointing, which will be
>> used in FDB later, to 0 at br_allowed_ingress() when we receive untagged
>> or priority-tagged frames, even though the PVID is valid.
>> This leads to FDB updates in such a wrong way that they are learned with
>> VID 0.
>> Update the value to that of PVID if the PVID is applied.
>>
>> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
>> Reviewed-by: Vlad Yasevich <vyasevic@redhat.com>
>> ---
>>   net/bridge/br_vlan.c | 1 +
>>   1 file changed, 1 insertion(+)
>>
>> diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
>> index 5a9c44a..53f0990 100644
>> --- a/net/bridge/br_vlan.c
>> +++ b/net/bridge/br_vlan.c
>> @@ -217,6 +217,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
>>   		/* PVID is set on this port.  Any untagged or priority-tagged
>>   		 * ingress frame is considered to belong to this vlan.
>>   		 */
>> +		*vid = pvid;
>>   		if (likely(err))
>>   			/* Untagged Frame. */
>>   			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), pvid);
>
>
> Ok, but side-effects seem like an indication of poor code logic
> flow design. Not your fault but part of the the per-vlan filtering code.
>

I'll see if I can re-work the code to get rid of the side-effects.

-vlad

^ permalink raw reply

* Re: [Xen-devel] [PATCH net-next v4 2/5] xen-netback: add support for IPv6 checksum offload from guest
From: Ian Campbell @ 2013-10-16 16:11 UTC (permalink / raw)
  To: Paul Durrant
  Cc: David Vrabel, Wei Liu, netdev@vger.kernel.org,
	xen-devel@lists.xen.org
In-Reply-To: <9AAE0902D5BC7E449B7C8E4E778ABCD0138D3C@AMSPEX01CL01.citrite.net>

On Mon, 2013-10-14 at 13:34 +0100, Paul Durrant wrote:
> > -----Original Message-----
> > From: David Vrabel
> > Sent: 14 October 2013 13:19
> > To: Wei Liu
> > Cc: Paul Durrant; netdev@vger.kernel.org; Ian Campbell; David Vrabel; xen-
> > devel@lists.xen.org
> > Subject: Re: [Xen-devel] [PATCH net-next v4 2/5] xen-netback: add support
> > for IPv6 checksum offload from guest
> > 
> > On 14/10/13 11:55, Wei Liu wrote:
> > > On Mon, Oct 14, 2013 at 11:49:20AM +0100, Paul Durrant wrote:
> > >>> -----Original Message-----
> > >>> From: Wei Liu [mailto:wei.liu2@citrix.com]
> > >>> Sent: 14 October 2013 11:43
> > >>> To: Paul Durrant
> > >>> Cc: xen-devel@lists.xen.org; netdev@vger.kernel.org; Wei Liu; David
> > Vrabel;
> > >>> Ian Campbell
> > >>> Subject: Re: [PATCH net-next v4 2/5] xen-netback: add support for IPv6
> > >>> checksum offload from guest
> > >>>
> > >>> On Fri, Oct 11, 2013 at 04:06:19PM +0100, Paul Durrant wrote:
> > >>> [...]
> > >>>> -/*
> > >>>> - * This is the amount of packet we copy rather than map, so that the
> > >>>> - * guest can't fiddle with the contents of the headers while we do
> > >>>> - * packet processing on them (netfilter, routing, etc).
> > >>>> +/* This is a miniumum size for the linear area to avoid lots of
> > >>>> + * calls to __pskb_pull_tail() as we set up checksum offsets.
> > >>>>   */
> > >>>
> > >>> You seem to forget to explain why 128 is chosen. :-)
> > >>
> > >> Is that not sufficient explanation? What sort of thing are you looking for?
> > >>
> > >
> > >>From the second version of this patch, we had a conversation.
> > >
> > >> Where does 128 come from?
> > >>
> > >
> > > "It's just an arbitrary power of 2 that was chosen because it seems to
> > > cover most likely v6 headers and all v4 headers."
> > >
> > > So something like: "We choose 128 which is likely to cover most V6
> > > headers and all V4 headers" would be sufficeint.
> > 
> > Is "most IPv6 headers" actually good enough?  Don't we need to ensure
> > netback copies all IP headers?
> > 
> 
> It will do if checksum offload is in use, but perhaps the pull as far
> as the transport header needs to be done anyway? I'm unsure of the
> expectations of other code.

I've always been under the impression that transport headers needed
pulling up too, for the benefit of netfilter perhaps?

AIUI the frags should be pure "payload". I may be wrong about that
though...

Ian.

^ permalink raw reply

* Re: [PATCH 0/5] rfkill-gpio: ACPI support
From: Rhyland Klein @ 2013-10-16 16:13 UTC (permalink / raw)
  To: Heikki Krogerus
  Cc: John W. Linville, Johannes Berg, Rafael J. Wysocki,
	linux-acpi@vger.kernel.org, linux-wireless@vger.kernel.org,
	netdev@vger.kernel.org
In-Reply-To: <1381920823-15403-1-git-send-email-heikki.krogerus@linux.intel.com>

On 10/16/2013 6:53 AM, Heikki Krogerus wrote:
> Hi,
> 
> The first patches prepare the driver for the support. The last patch
> can then add the support quite easily. With these patches, adding DT
> support later will be quite straight forward if someone needs it.
> 
> 
> Heikki Krogerus (5):
>   net: rfkill: gpio: convert to resource managed allocation
>   net: rfkill: gpio: clean up clock handling
>   net: rfkill: gpio: spinlock-safe GPIO access
>   net: rfkill: gpio: prepare for DT and ACPI support
>   net: rfkill: gpio: add ACPI support
> 
>  net/rfkill/Kconfig       |   2 +-
>  net/rfkill/rfkill-gpio.c | 211 ++++++++++++++++++++++-------------------------
>  2 files changed, 99 insertions(+), 114 deletions(-)
> 

Strictly speaking, duplicating the pdata fields into the
rfkill_gpio_data structure isn't really necessary. Many drivers simply
have the dt parsing or in this case ACPI parsing generate a
platform_data structure which would then be saved in the
rfkill_gpio_data structure.

In this case, since it is only a few fields, I am not too worried and I
am fine either way.

Acked-by: Rhyland Klein <rklein@nvidia.com>

-- 
nvpublic

^ permalink raw reply

* Re: [PATCH net-next v4 4/5] xen-netback: handle IPv6 TCP GSO packets from the guest
From: Ian Campbell @ 2013-10-16 16:15 UTC (permalink / raw)
  To: Paul Durrant; +Cc: xen-devel, netdev, Wei Liu, David Vrabel
In-Reply-To: <1381503982-1418-5-git-send-email-paul.durrant@citrix.com>

On Fri, 2013-10-11 at 16:06 +0100, Paul Durrant wrote:
> This patch adds a xenstore feature flag, festure-gso-tcpv6, to advertise
> that netback can handle IPv6 TCP GSO packets. It creates SKB_GSO_TCPV6 skbs
> if the frontend passes an extra segment with the new type
> XEN_NETIF_GSO_TYPE_TCPV6 added to netif.h.
> 
> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>

This patch looks fine but should it not be after the following patch
which actually causes the necessary pull ups to happen?

> Cc: Wei Liu <wei.liu2@citrix.com>
> Cc: David Vrabel <david.vrabel@citrix.com>
> Cc: Ian Campbell <ian.campbell@citrix.com>
> ---
>  drivers/net/xen-netback/netback.c |   11 ++++++++---
>  drivers/net/xen-netback/xenbus.c  |    7 +++++++
>  include/xen/interface/io/netif.h  |   10 +++++++++-
>  3 files changed, 24 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
> index 74c13b9..65f04e7 100644
> --- a/drivers/net/xen-netback/netback.c
> +++ b/drivers/net/xen-netback/netback.c
> @@ -1096,15 +1096,20 @@ static int xenvif_set_skb_gso(struct xenvif *vif,
>  		return -EINVAL;
>  	}
>  
> -	/* Currently only TCPv4 S.O. is supported. */
> -	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
> +	switch (gso->u.gso.type) {
> +	case XEN_NETIF_GSO_TYPE_TCPV4:
> +		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
> +		break;
> +	case XEN_NETIF_GSO_TYPE_TCPV6:
> +		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
> +		break;
> +	default:
>  		netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type);
>  		xenvif_fatal_tx_err(vif);
>  		return -EINVAL;
>  	}
>  
>  	skb_shinfo(skb)->gso_size = gso->u.gso.size;
> -	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
>  
>  	/* Header must be checked, and gso_segs computed. */
>  	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
> diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
> index 9c9b37d..7e4dcc9 100644
> --- a/drivers/net/xen-netback/xenbus.c
> +++ b/drivers/net/xen-netback/xenbus.c
> @@ -105,6 +105,13 @@ static int netback_probe(struct xenbus_device *dev,
>  			goto abort_transaction;
>  		}
>  
> +		err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6",
> +				    "%d", sg);
> +		if (err) {
> +			message = "writing feature-gso-tcpv6";
> +			goto abort_transaction;
> +		}
> +
>  		/* We support partial checksum setup for IPv6 packets */
>  		err = xenbus_printf(xbt, dev->nodename,
>  				    "feature-ipv6-csum-offload",
> diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
> index d9fb44739..d7dd8d7 100644
> --- a/include/xen/interface/io/netif.h
> +++ b/include/xen/interface/io/netif.h
> @@ -61,6 +61,13 @@
>   */
>  
>  /*
> + * "feature-gso-tcpv4" and "feature-gso-tcpv6" advertise the capability to
> + * handle large TCP packets (in IPv4 or IPv6 form respectively). Neither
> + * frontends nor backends are assumed to be capable unless the flags are
> + * present.
> + */
> +
> +/*
>   * This is the 'wire' format for packets:
>   *  Request 1: xen_netif_tx_request  -- XEN_NETTXF_* (any flags)
>   * [Request 2: xen_netif_extra_info]    (only if request 1 has XEN_NETTXF_extra_info)
> @@ -105,8 +112,9 @@ struct xen_netif_tx_request {
>  #define _XEN_NETIF_EXTRA_FLAG_MORE	(0)
>  #define  XEN_NETIF_EXTRA_FLAG_MORE	(1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
>  
> -/* GSO types - only TCPv4 currently supported. */
> +/* GSO types */
>  #define XEN_NETIF_GSO_TYPE_TCPV4	(1)
> +#define XEN_NETIF_GSO_TYPE_TCPV6	(2)
>  
>  /*
>   * This structure needs to fit within both netif_tx_request and

^ permalink raw reply

* Re: [PATCH v2 net 2/4] bridge: Apply the PVID to priority-tagged frames
From: Vlad Yasevich @ 2013-10-16 16:16 UTC (permalink / raw)
  To: Stephen Hemminger, Toshiaki Makita
  Cc: David S . Miller, netdev, Toshiaki Makita
In-Reply-To: <20131016085537.1cbe9c37@nehalam.linuxnetplumber.net>

On 10/16/2013 11:55 AM, Stephen Hemminger wrote:
> On Wed, 16 Oct 2013 17:07:14 +0900
> Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp> wrote:
>
>> IEEE 802.1Q says that when we receive priority-tagged (VID 0) frames
>> use the PVID for the port as its VID.
>> (See IEEE 802.1Q-2011 6.9.1 and Table 9-2)
>>
>> Apply the PVID to not only untagged frames but also priority-tagged frames.
>>
>> Signed-off-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
>> ---
>>   net/bridge/br_vlan.c | 27 ++++++++++++++++++++-------
>>   1 file changed, 20 insertions(+), 7 deletions(-)
>>
>> diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
>> index 21b6d21..5a9c44a 100644
>> --- a/net/bridge/br_vlan.c
>> +++ b/net/bridge/br_vlan.c
>> @@ -189,6 +189,8 @@ out:
>>   bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
>>   			struct sk_buff *skb, u16 *vid)
>>   {
>> +	int err;
>> +
>>   	/* If VLAN filtering is disabled on the bridge, all packets are
>>   	 * permitted.
>>   	 */
>> @@ -201,20 +203,31 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
>>   	if (!v)
>>   		return false;
>>
>> -	if (br_vlan_get_tag(skb, vid)) {
>> +	err = br_vlan_get_tag(skb, vid);
>> +	if (!*vid) {
>>   		u16 pvid = br_get_pvid(v);
>
> Ok, but it looks like br_vlan_get_tag() could be cleaner if it just returned
> the tag, and there was another br_vlan_tag_present() function.

I was just thinking about that as well.  If we make br_vlan_get_tag()
return either the actual tag (if the packet is tagged), or the pvid
if (untagged/prio_tagged), then we can skp most of this.

>
> Also, does this still work if CONFIG_BRIDGE_VLAN_FILTERING is disabled?

Yes.  br_allowed_ingress becomes an inline if the config option is disabled.

-vlad

^ permalink raw reply

* Re: [PATCH net-next v4 4/5] xen-netback: handle IPv6 TCP GSO packets from the guest
From: Ian Campbell @ 2013-10-16 16:16 UTC (permalink / raw)
  To: Paul Durrant; +Cc: xen-devel, netdev, Wei Liu, David Vrabel
In-Reply-To: <1381940113.30409.5.camel@kazak.uk.xensource.com>

On Wed, 2013-10-16 at 17:15 +0100, Ian Campbell wrote:
> On Fri, 2013-10-11 at 16:06 +0100, Paul Durrant wrote:
> > This patch adds a xenstore feature flag, festure-gso-tcpv6, to advertise
> > that netback can handle IPv6 TCP GSO packets. It creates SKB_GSO_TCPV6 skbs
> > if the frontend passes an extra segment with the new type
> > XEN_NETIF_GSO_TYPE_TCPV6 added to netif.h.
> > 
> > Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> 
> This patch looks fine but should it not be after the following patch
> which actually causes the necessary pull ups to happen?

Sorry, it is, I shouldn't review series in two halves, forget what I've
already seen!

> > Cc: Wei Liu <wei.liu2@citrix.com>
> > Cc: David Vrabel <david.vrabel@citrix.com>

Acked-by: Ian Campbell <ian.campbell@citrix.com>

> > ---
> >  drivers/net/xen-netback/netback.c |   11 ++++++++---
> >  drivers/net/xen-netback/xenbus.c  |    7 +++++++
> >  include/xen/interface/io/netif.h  |   10 +++++++++-
> >  3 files changed, 24 insertions(+), 4 deletions(-)
> > 
> > diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
> > index 74c13b9..65f04e7 100644
> > --- a/drivers/net/xen-netback/netback.c
> > +++ b/drivers/net/xen-netback/netback.c
> > @@ -1096,15 +1096,20 @@ static int xenvif_set_skb_gso(struct xenvif *vif,
> >  		return -EINVAL;
> >  	}
> >  
> > -	/* Currently only TCPv4 S.O. is supported. */
> > -	if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) {
> > +	switch (gso->u.gso.type) {
> > +	case XEN_NETIF_GSO_TYPE_TCPV4:
> > +		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
> > +		break;
> > +	case XEN_NETIF_GSO_TYPE_TCPV6:
> > +		skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
> > +		break;
> > +	default:
> >  		netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type);
> >  		xenvif_fatal_tx_err(vif);
> >  		return -EINVAL;
> >  	}
> >  
> >  	skb_shinfo(skb)->gso_size = gso->u.gso.size;
> > -	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
> >  
> >  	/* Header must be checked, and gso_segs computed. */
> >  	skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
> > diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
> > index 9c9b37d..7e4dcc9 100644
> > --- a/drivers/net/xen-netback/xenbus.c
> > +++ b/drivers/net/xen-netback/xenbus.c
> > @@ -105,6 +105,13 @@ static int netback_probe(struct xenbus_device *dev,
> >  			goto abort_transaction;
> >  		}
> >  
> > +		err = xenbus_printf(xbt, dev->nodename, "feature-gso-tcpv6",
> > +				    "%d", sg);
> > +		if (err) {
> > +			message = "writing feature-gso-tcpv6";
> > +			goto abort_transaction;
> > +		}
> > +
> >  		/* We support partial checksum setup for IPv6 packets */
> >  		err = xenbus_printf(xbt, dev->nodename,
> >  				    "feature-ipv6-csum-offload",
> > diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
> > index d9fb44739..d7dd8d7 100644
> > --- a/include/xen/interface/io/netif.h
> > +++ b/include/xen/interface/io/netif.h
> > @@ -61,6 +61,13 @@
> >   */
> >  
> >  /*
> > + * "feature-gso-tcpv4" and "feature-gso-tcpv6" advertise the capability to
> > + * handle large TCP packets (in IPv4 or IPv6 form respectively). Neither
> > + * frontends nor backends are assumed to be capable unless the flags are
> > + * present.
> > + */
> > +
> > +/*
> >   * This is the 'wire' format for packets:
> >   *  Request 1: xen_netif_tx_request  -- XEN_NETTXF_* (any flags)
> >   * [Request 2: xen_netif_extra_info]    (only if request 1 has XEN_NETTXF_extra_info)
> > @@ -105,8 +112,9 @@ struct xen_netif_tx_request {
> >  #define _XEN_NETIF_EXTRA_FLAG_MORE	(0)
> >  #define  XEN_NETIF_EXTRA_FLAG_MORE	(1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
> >  
> > -/* GSO types - only TCPv4 currently supported. */
> > +/* GSO types */
> >  #define XEN_NETIF_GSO_TYPE_TCPV4	(1)
> > +#define XEN_NETIF_GSO_TYPE_TCPV6	(2)
> >  
> >  /*
> >   * This structure needs to fit within both netif_tx_request and
> 

^ permalink raw reply

* Re: [Xen-devel] [PATCH net-next v4 0/5] xen-netback: IPv6 offload support
From: Ian Campbell @ 2013-10-16 16:20 UTC (permalink / raw)
  To: Paul Durrant; +Cc: xen-devel, netdev
In-Reply-To: <1381503982-1418-1-git-send-email-paul.durrant@citrix.com>

On Fri, 2013-10-11 at 16:06 +0100, Paul Durrant wrote:
> This patch series adds support for checksum and large packet offloads into
> xen-netback.
> Testing has mainly been done using the Microsoft network hardware
> certification suite running in Server 2008R2 VMs with Citrix PV frontends.

Are there any Linux netfront patches in existence/the pipeline to take
advantage of this?

> 
> v2:
> - Fixed Wei's email address in Cc lines
> 
> v3:
> - Responded to Wei's comments:
>  - netif.h now updated with comments and a definition of
>    XEN_NETIF_GSO_TYPE_NONE.
>  - limited number of pullups
> - Responded to Annie's comments:
>  - New GSO_BIT macro
> 
> v4:
> - Responded to more of Wei's comments
> - Remove parsing of IPv6 fragment header and added warning
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@lists.xen.org
> http://lists.xen.org/xen-devel

^ permalink raw reply

* Re: [PATCH] WAN: Adding support for Infineon PEF2256 E1 chipset
From: Rob Herring @ 2013-10-16 16:24 UTC (permalink / raw)
  To: Christophe Leroy
  Cc: Rob Herring, Pawel Moll, Mark Rutland, Stephen Warren,
	Ian Campbell, Rob Landley, Grant Likely, Krzysztof Halasa,
	devicetree@vger.kernel.org, linux-doc,
	linux-kernel@vger.kernel.org, netdev, jerome.chantelauze
In-Reply-To: <201310161525.r9GFPZI5006238@localhost.localdomain>

On Wed, Oct 16, 2013 at 10:25 AM, Christophe Leroy
<christophe.leroy@c-s.fr> wrote:
> The patch adds WAN support for Infineon PEF2256 E1 Chipset.
>
> Signed-off-by: Jerome Chantelauze <jerome.chantelauze@c-s.fr>
> Acked-by: Christophe Leroy <christophe.leroy@c-s.fr>

[snip]

> diff -urN a/Documentation/devicetree/bindings/net/pef2256.txt b/Documentation/devicetree/bindings/net/pef2256.txt
> --- a/Documentation/devicetree/bindings/net/pef2256.txt 1970-01-01 01:00:00.000000000 +0100
> +++ b/Documentation/devicetree/bindings/net/pef2256.txt 2013-10-13 15:05:42.000000000 +0200
> @@ -0,0 +1,29 @@
> +* Wan on Infineon pef2256 E1 controller
> +
> +Required properties:
> +- compatible: Should be "infineon,pef2256"
> +- reg: Address and length of the register set for the device
> +- interrupts: Should contain interrupts
> +
> +Optional properties:
> +- data-rate: Data rate on the system highway.
> +  Supported values are: 2, 4, 8, 16.
> +  8 if not defined.

What are the units? Specify them in the property name.

> +- channel-phase: First time slot transmission channel phase.
> +  Supported values are: 0, 1, 2, 3, 4, 5, 6, 7.
> +  0 if not defined.

This description basically tells me nothing.

> +- rising-edge-sync-pulse: rising edge synchronous pulse.
> +  Supported values are: "receive", "transmit".
> +  "transmit" if not defined.

Are receive and transmit mutually exclusive? If so, then wouldn't a
single property like "rx-rising-edge-sync-pulse" be sufficient.

> +
> +Examples:
> +
> +       e1-wan@4,2000000 {
> +               compatible = "infineon,pef2256";
> +               reg = <4 0x2000000 0xFF>;
> +               interrupts = <8 1>;
> +               interrupt-parent = <&PIC>;
> +               data-rate = <4>;
> +               channel-phase = <1>;
> +               rising-edge-sync-pulse = "transmit";
> +       };
> diff -urN a/drivers/net/wan/Makefile b/drivers/net/wan/Makefile
> --- a/drivers/net/wan/Makefile  1970-01-01 01:00:00.000000000 +0100
> +++ b/drivers/net/wan/Makefile  2013-10-13 13:05:01.000000000 +0200
> @@ -22,6 +22,7 @@
>  obj-$(CONFIG_COSA)             += cosa.o
>  obj-$(CONFIG_FARSYNC)          += farsync.o
>  obj-$(CONFIG_DSCC4)             += dscc4.o
> +obj-$(CONFIG_PEF2256)           += pef2256.o
>  obj-$(CONFIG_X25_ASY)          += x25_asy.o
>
>  obj-$(CONFIG_LANMEDIA)         += lmc/
> diff -urN a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig
> --- a/drivers/net/wan/Kconfig   1970-01-01 01:00:00.000000000 +0100
> +++ b/drivers/net/wan/Kconfig   2013-10-13 13:05:01.000000000 +0200
> @@ -266,6 +266,16 @@
>           To compile this driver as a module, choose M here: the
>           module will be called farsync.
>
> +config PEF2256
> +       tristate "PEF2256 support"
> +       depends on HDLC && OF && SYSFS

It would be better if this can build without OF selected.

Rob

^ permalink raw reply

* [PATCH net-next] tcp: remove redundant code in __tcp_retransmit_skb()
From: Neal Cardwell @ 2013-10-16 16:36 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, Neal Cardwell, Eric Dumazet, Yuchung Cheng

Remove the specialized code in __tcp_retransmit_skb() that tries to trim
any ACKed payload preceding a FIN before we retransmit (this was added
in 1999 in v2.2.3pre3). This trimming code was made unreachable by the
more general code added above it that uses tcp_trim_head() to trim any
ACKed payload, with or without a FIN (this was added in "[NET]: Add
segmentation offload support to TCP." in 2002 circa v2.5.33).

Signed-off-by: Neal Cardwell <ncardwell@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
---
 net/ipv4/tcp_output.c | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e5ce0e1..ce7c4d9 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2351,21 +2351,6 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 
 	tcp_retrans_try_collapse(sk, skb, cur_mss);
 
-	/* Some Solaris stacks overoptimize and ignore the FIN on a
-	 * retransmit when old data is attached.  So strip it off
-	 * since it is cheap to do so and saves bytes on the network.
-	 */
-	if (skb->len > 0 &&
-	    (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) &&
-	    tp->snd_una == (TCP_SKB_CB(skb)->end_seq - 1)) {
-		if (!pskb_trim(skb, 0)) {
-			/* Reuse, even though it does some unnecessary work */
-			tcp_init_nondata_skb(skb, TCP_SKB_CB(skb)->end_seq - 1,
-					     TCP_SKB_CB(skb)->tcp_flags);
-			skb->ip_summed = CHECKSUM_NONE;
-		}
-	}
-
 	/* Make a copy, if the first transmission SKB clone we made
 	 * is still in somebody's hands, else make a clone.
 	 */
-- 
1.8.4

^ permalink raw reply related

* Re: [PATCH net-next] tcp: remove redundant code in __tcp_retransmit_skb()
From: Eric Dumazet @ 2013-10-16 16:44 UTC (permalink / raw)
  To: Neal Cardwell; +Cc: David Miller, netdev, Eric Dumazet, Yuchung Cheng
In-Reply-To: <1381941411-30645-1-git-send-email-ncardwell@google.com>

On Wed, 2013-10-16 at 12:36 -0400, Neal Cardwell wrote:
> Remove the specialized code in __tcp_retransmit_skb() that tries to trim
> any ACKed payload preceding a FIN before we retransmit (this was added
> in 1999 in v2.2.3pre3). This trimming code was made unreachable by the
> more general code added above it that uses tcp_trim_head() to trim any
> ACKed payload, with or without a FIN (this was added in "[NET]: Add
> segmentation offload support to TCP." in 2002 circa v2.5.33).
> 
> Signed-off-by: Neal Cardwell <ncardwell@google.com>
> Cc: Eric Dumazet <edumazet@google.com>
> Cc: Yuchung Cheng <ycheng@google.com>
> ---
>  net/ipv4/tcp_output.c | 15 ---------------
>  1 file changed, 15 deletions(-)

Acked-by: Eric Dumazet <edumazet@google.com>

^ permalink raw reply

* Re: [Xen-devel] DomU's network interface will hung when Dom0 running 32bit
From: jianhai luan @ 2013-10-16 16:44 UTC (permalink / raw)
  To: David Vrabel, Wei Liu; +Cc: xen-devel, annie li, Ian Campbell, netdev
In-Reply-To: <525EBABB.4070906@citrix.com>


On 2013-10-17 0:11, David Vrabel wrote:
> On 16/10/13 16:17, Wei Liu wrote:
>> On Wed, Oct 16, 2013 at 11:04:34PM +0800, jianhai luan wrote:
>> [...]
>>> >From ef02403a10173896c5c102f768741d0700b8a3a2 Mon Sep 17 00:00:00 2001
>>> From: Jason Luan <jianhai.luan@oracle.com>
>>> Date: Tue, 15 Oct 2013 17:07:49 +0800
>>> Subject: [PATCH] xen-netback: pending timer only in the range [expire,
>>>   next_credit)
>>>
>>> The function time_after_eq() do correct judge in range of MAX_UNLONG/2.
>>> If net-front send lesser package, the delta between now and next_credit
>>> will out of the range and time_after_eq() will do wrong judge in result
>>> to net-front hung.  For example:
>>>      expire    next_credit    ....    next_credit+MAX_UNLONG/2    now
>>>      -----------------time increases this direction----------------->
>>>
>>> We should be add the environment which now beyond next_credit+MAX_UNLONG/2.
>>> Because the fact now mustn't before expire, time_before(now, expire) == true
>>> will show the environment.
>>>      time_after_eq(now, next_credit) || time_before (now, expire)
>>>      ==
>>>      !time_in_range_open(now, expire, next_credit)
>>>
> I would like the description improved because it's too hard to understand.
>
> How about something like:
>
> "time_after_eq() only works if the delta is < MAX_ULONG/2.
>
> If netfront sends at a very low rate, the time between subsequent calls
> to tx_credit_exceeded() may exceed MAX_ULONG/2 and the test for
> timer_after_eq() will be incorrect.  Credit will not be replenished and
> the guest may become unable to send (e.g., if prior to the long gap, all
> credit was exhausted)."

Thanks your description, i will accept it. :)
>
> But that's as far as I get because I can't see how the fix is correct.
> The time_in_range() test might still return the wrong value if now has
> advanced even further and wrapped so it is between expire and
> next_credit again.

typo, time_in_range() should be time_in_range_open().
Yes, if now have advanced even further and wrapped, it will always fall 
in [ expire, next_credit). In the range, please think two scenario:
    * No transmit limit: expire == next_credit, the range will be zero, 
replenish will always be done.
    * Transmit limit: Because guest may be consume all credit_bytes in 
very short time, other time in [expire, next_credit) will don't send any 
package. So the time which don't send package should be think about when 
we set the rate parameter.
       So if now fall in the range, the hung time should be acceptable. 
(if rate=10000M/s, the worse time will be 4s).
>
> I think the credit timeout should be always armed to expire in
> MAX_ULONG/4 jiffies (or some other large value).  If credit is exceeded,
> this timer is then adjusted to fire earlier (at next_credit as it does
> already).

Setting timer may be fixed the issue. But i don't think how to verify 
the fixed expect waiting 180 days. I verified the above patch only 
change expire's value to emulator the scenario.
>
> David

Jason.

^ permalink raw reply

* Re: [PATCH net-next v4 5/5] xen-netback: enable IPv6 TCP GSO to the guest
From: Ian Campbell @ 2013-10-16 16:49 UTC (permalink / raw)
  To: Paul Durrant; +Cc: xen-devel, netdev, Wei Liu, David Vrabel
In-Reply-To: <1381503982-1418-6-git-send-email-paul.durrant@citrix.com>

On Fri, 2013-10-11 at 16:06 +0100, Paul Durrant wrote:
> This patch adds code to handle SKB_GSO_TCPV6 skbs and construct appropriate
> extra or prefix segments to pass the large packet to the frontend. New
> xenstore flags, feature-gso-tcpv6 and feature-gso-tcpv6-prefix, are sampled
> to determine if the frontend is capable of handling such packets.

IIRC the reason we have feature-gso-tcpv4 and feature-gso-tcpv4-prefix
is that the former did things in a way which Windows couldn't cope with.
I assuming that is true for v6 too. But could Linux cope with the prefix
version too for v6 and reduce the number of options? Or is the
non-prefix variant actually better, if the guest can manage, for some
reason?

I suppose in the end its all piggybacking off the v4 code paths so
supporting both isn't a hardship.


> 
> Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
> Cc: Wei Liu <wei.liu2@citrix.com>
> Cc: David Vrabel <david.vrabel@citrix.com>
> Cc: Ian Campbell <ian.campbell@citrix.com>
> ---
>  drivers/net/xen-netback/common.h    |    9 +++++--
>  drivers/net/xen-netback/interface.c |    6 +++--
>  drivers/net/xen-netback/netback.c   |   48 +++++++++++++++++++++++++++--------
>  drivers/net/xen-netback/xenbus.c    |   29 +++++++++++++++++++--
>  include/xen/interface/io/netif.h    |    1 +
>  5 files changed, 77 insertions(+), 16 deletions(-)
> 
> diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
> index b4a9a3c..55b8dec 100644
> --- a/drivers/net/xen-netback/common.h
> +++ b/drivers/net/xen-netback/common.h
> @@ -87,9 +87,13 @@ struct pending_tx_info {
>  struct xenvif_rx_meta {
>  	int id;
>  	int size;
> +	int gso_type;
>  	int gso_size;
>  };
>  
> +#define GSO_BIT(type) \
> +	(1 << XEN_NETIF_GSO_TYPE_ ## type)
> +
>  /* Discriminate from any valid pending_idx value. */
>  #define INVALID_PENDING_IDX 0xFFFF
>  
> @@ -150,9 +154,10 @@ struct xenvif {
>  	u8               fe_dev_addr[6];
>  
>  	/* Frontend feature information. */
> +	int gso_mask;
> +	int gso_prefix_mask;

Are these storing NETIF_F_FOO? In which case they should be
netif_feature_t I think. At the least it needs to be unsigned.

> +
>  	u8 can_sg:1;
> -	u8 gso:1;
> -	u8 gso_prefix:1;
>  	u8 ip_csum:1;
>  	u8 ipv6_csum:1;
>  
> diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
> index cb0d8ea..e4aa267 100644
> --- a/drivers/net/xen-netback/interface.c
> +++ b/drivers/net/xen-netback/interface.c
> @@ -214,8 +214,10 @@ static netdev_features_t xenvif_fix_features(struct net_device *dev,
>  
>  	if (!vif->can_sg)
>  		features &= ~NETIF_F_SG;
> -	if (!vif->gso && !vif->gso_prefix)
> +	if (~(vif->gso_mask | vif->gso_prefix_mask) & GSO_BIT(TCPV4))

I must be blind -- where does this GSO_BIT macro come from?

I can't see it in current net-next...

> @@ -392,7 +394,14 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
>  		}
>  
>  		/* Leave a gap for the GSO descriptor. */
> -		if (*head && skb_shinfo(skb)->gso_size && !vif->gso_prefix)
> +		if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4)
> +			gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
> +		else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6)

I think you can use skb_is_gso(skb) and skb_is_gso_v6(skb) for these
ifs.

> +			gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
> +		else
> +			gso_type = XEN_NETIF_GSO_TYPE_NONE;
> +
> +		if (*head && ((1 << gso_type) & vif->gso_mask))

Perhaps test_bit(gso_type, &vif->gso_mask) rather than opencoding
bitops?

I see there are a lot of these, a vif_can_gso_type(vif, gso_type) helper
might be nice.

>  			vif->rx.req_cons++;
>  
>  		*head = 0; /* There must be something in this buffer now. */
> @@ -423,14 +432,28 @@ static int xenvif_gop_skb(struct sk_buff *skb,
>  	unsigned char *data;
>  	int head = 1;
>  	int old_meta_prod;
> +	int gso_type;
> +	int gso_size;
>  
>  	old_meta_prod = npo->meta_prod;
>  
> +	if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) {
> +		gso_type = XEN_NETIF_GSO_TYPE_TCPV4;
> +		gso_size = skb_shinfo(skb)->gso_size;
> +	} else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) {
> +		gso_type = XEN_NETIF_GSO_TYPE_TCPV6;
> +		gso_size = skb_shinfo(skb)->gso_size;
> +	} else {
> +		gso_type = XEN_NETIF_GSO_TYPE_NONE;
> +		gso_size = 0;
> +	}
> +
>  	/* Set up a GSO prefix descriptor, if necessary */
> -	if (skb_shinfo(skb)->gso_size && vif->gso_prefix) {
> +	if ((1 << skb_shinfo(skb)->gso_type) & vif->gso_prefix_mask) {

Isn't skb->gso_type a Linux GSO flag thing and vif->gso_prefix_mask a
Xen netif.h GSO type? Are they really comparable in this way?

>  		req = RING_GET_REQUEST(&vif->rx, vif->rx.req_cons++);
>  		meta = npo->meta + npo->meta_prod++;
> -		meta->gso_size = skb_shinfo(skb)->gso_size;
> +		meta->gso_type = gso_type;
> +		meta->gso_size = gso_size;
>  		meta->size = 0;
>  		meta->id = req->id;
>  	}

> +	if (vif->gso_mask & vif->gso_prefix_mask) {
> +		xenbus_dev_fatal(dev, err,
> +				 "%s: gso and gso prefix flags are not "
> +				 "mutually exclusive",

Aren't they? I thought they were? Maybe I'm reading this error message
backwards, in which case I would contend that it is written
backwards ;-)

Ian.

^ permalink raw reply

* [PATCH net-next v5 1/5] xen-netback: add support for IPv6 checksum offload to guest
From: Paul Durrant @ 2013-10-16 16:50 UTC (permalink / raw)
  To: xen-devel, netdev; +Cc: Paul Durrant, Wei Liu, David Vrabel, Ian Campbell
In-Reply-To: <1381942232-26268-1-git-send-email-paul.durrant@citrix.com>

Check xenstore flag feature-ipv6-csum-offload to determine if a
guest is happy to accept IPv6 packets with only partial checksum.

Signed-off-by: Paul Durrant <paul.durrant@citrix.com>
Cc: Wei Liu <wei.liu2@citrix.com>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/net/xen-netback/common.h    |    3 ++-
 drivers/net/xen-netback/interface.c |   10 +++++++---
 drivers/net/xen-netback/xenbus.c    |    7 ++++++-
 include/xen/interface/io/netif.h    |    7 +++++++
 4 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 5715318..b4a9a3c 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -153,7 +153,8 @@ struct xenvif {
 	u8 can_sg:1;
 	u8 gso:1;
 	u8 gso_prefix:1;
-	u8 csum:1;
+	u8 ip_csum:1;
+	u8 ipv6_csum:1;
 
 	/* Internal feature information. */
 	u8 can_queue:1;	    /* can queue packets for receiver? */
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 01bb854..8e92783 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -216,8 +216,10 @@ static netdev_features_t xenvif_fix_features(struct net_device *dev,
 		features &= ~NETIF_F_SG;
 	if (!vif->gso && !vif->gso_prefix)
 		features &= ~NETIF_F_TSO;
-	if (!vif->csum)
+	if (!vif->ip_csum)
 		features &= ~NETIF_F_IP_CSUM;
+	if (!vif->ipv6_csum)
+		features &= ~NETIF_F_IPV6_CSUM;
 
 	return features;
 }
@@ -306,7 +308,7 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
 	vif->domid  = domid;
 	vif->handle = handle;
 	vif->can_sg = 1;
-	vif->csum = 1;
+	vif->ip_csum = 1;
 	vif->dev = dev;
 
 	vif->credit_bytes = vif->remaining_credit = ~0UL;
@@ -316,7 +318,9 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
 	vif->credit_timeout.expires = jiffies;
 
 	dev->netdev_ops	= &xenvif_netdev_ops;
-	dev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
+	dev->hw_features = NETIF_F_SG |
+		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+		NETIF_F_TSO;
 	dev->features = dev->hw_features;
 	SET_ETHTOOL_OPS(dev, &xenvif_ethtool_ops);
 
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index 1b08d87..ad27b15 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -574,7 +574,12 @@ static int connect_rings(struct backend_info *be)
 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-no-csum-offload",
 			 "%d", &val) < 0)
 		val = 0;
-	vif->csum = !val;
+	vif->ip_csum = !val;
+
+	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-ipv6-csum-offload",
+			 "%d", &val) < 0)
+		val = 0;
+	vif->ipv6_csum = !!val;
 
 	/* Map the shared frame, irq etc. */
 	err = xenvif_connect(vif, tx_ring_ref, rx_ring_ref,
diff --git a/include/xen/interface/io/netif.h b/include/xen/interface/io/netif.h
index eb262e3..c9e8184 100644
--- a/include/xen/interface/io/netif.h
+++ b/include/xen/interface/io/netif.h
@@ -51,6 +51,13 @@
  */
 
 /*
+ * "feature-no-csum-offload" should be used to turn IPv4 TCP/UDP checksum
+ * offload off or on. If it is missing then the feature is assumed to be on.
+ * "feature-ipv6-csum-offload" should be used to turn IPv6 TCP/UDP checksum
+ * offload on or off. If it is missing then the feature is assumed to be off.
+ */
+
+/*
  * This is the 'wire' format for packets:
  *  Request 1: xen_netif_tx_request  -- XEN_NETTXF_* (any flags)
  * [Request 2: xen_netif_extra_info]    (only if request 1 has XEN_NETTXF_extra_info)
-- 
1.7.10.4

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox