Netdev List
 help / color / mirror / Atom feed
* [net-next.git 3/8] stmmac: start adding pcs and rgmii core irq
From: Giuseppe CAVALLARO @ 2013-03-26 14:43 UTC (permalink / raw)
  To: netdev; +Cc: rayagond, richardcochran, Giuseppe Cavallaro, Udit Kumar
In-Reply-To: <1364308992-27929-1-git-send-email-peppe.cavallaro@st.com>

This patch starts adding in the main ISR the management of the PCS and
RGMII/SGMII core interrupts. This is to help further development
on this area. Currently the core irq handler only clears the
PCS and S-R_MII interrupts and reports the event in the ethtool stats.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Tested-by: Byungho An <bh74.an@samsung.com>
Cc: Udit Kumar <udit-dlh.kumar@st.com>
---
 drivers/net/ethernet/stmicro/stmmac/common.h       |   25 ++++++-----
 drivers/net/ethernet/stmicro/stmmac/dwmac1000.h    |    5 +-
 .../net/ethernet/stmicro/stmmac/dwmac1000_core.c   |   44 ++++++++++++-------
 .../net/ethernet/stmicro/stmmac/dwmac100_core.c    |    3 +-
 .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c   |    4 ++
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |   24 ++---------
 6 files changed, 55 insertions(+), 50 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 8a04b7f..479f479 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -140,6 +140,10 @@ struct stmmac_extra_stats {
 	unsigned long l3_filter_match;
 	unsigned long l4_filter_match;
 	unsigned long l3_l4_filter_no_match;
+	/* PCS */
+	unsigned long irq_pcs_ane_n;
+	unsigned long irq_pcs_link_n;
+	unsigned long irq_rgmii_n;
 };
 
 /* CSR Frequency Access Defines*/
@@ -217,16 +221,14 @@ enum dma_irq_status {
 	handle_tx = 0x8,
 };
 
-enum core_specific_irq_mask {
-	core_mmc_tx_irq = 1,
-	core_mmc_rx_irq = 2,
-	core_mmc_rx_csum_offload_irq = 4,
-	core_irq_receive_pmt_irq = 8,
-	core_irq_tx_path_in_lpi_mode = 16,
-	core_irq_tx_path_exit_lpi_mode = 32,
-	core_irq_rx_path_in_lpi_mode = 64,
-	core_irq_rx_path_exit_lpi_mode = 128,
-};
+#define	CORE_IRQ_TX_PATH_IN_LPI_MODE	(1 << 1)
+#define	CORE_IRQ_TX_PATH_EXIT_LPI_MODE	(1 << 2)
+#define	CORE_IRQ_RX_PATH_IN_LPI_MODE	(1 << 3)
+#define	CORE_IRQ_RX_PATH_EXIT_LPI_MODE	(1 << 4)
+
+#define	CORE_PCS_ANE_COMPLETE		(1 << 5)
+#define	CORE_PCS_LINK_STATUS		(1 << 6)
+#define	CORE_RGMII_IRQ			(1 << 7)
 
 /* DMA HW capabilities */
 struct dma_features {
@@ -355,7 +357,8 @@ struct stmmac_ops {
 	/* Dump MAC registers */
 	void (*dump_regs) (void __iomem *ioaddr);
 	/* Handle extra events on specific interrupts hw dependent */
-	int (*host_irq_status) (void __iomem *ioaddr);
+	int (*host_irq_status) (void __iomem *ioaddr,
+				struct stmmac_extra_stats *x);
 	/* Multicast filter setting */
 	void (*set_filter) (struct net_device *dev, int id);
 	/* Flow control setting */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
index 85466e5..6dd689e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
@@ -89,13 +89,14 @@ enum power_event {
 				(reg * 8))
 #define GMAC_MAX_PERFECT_ADDRESSES	32
 
+/* PCS registers (AN/TBI/SGMII/RGMII) offset */
 #define GMAC_AN_CTRL	0x000000c0	/* AN control */
 #define GMAC_AN_STATUS	0x000000c4	/* AN status */
 #define GMAC_ANE_ADV	0x000000c8	/* Auto-Neg. Advertisement */
-#define GMAC_ANE_LINK	0x000000cc	/* Auto-Neg. link partener ability */
+#define GMAC_ANE_LPA	0x000000cc	/* Auto-Neg. link partener ability */
 #define GMAC_ANE_EXP	0x000000d0	/* ANE expansion */
 #define GMAC_TBI	0x000000d4	/* TBI extend status */
-#define GMAC_GMII_STATUS 0x000000d8	/* S/R-GMII status */
+#define GMAC_S_R_GMII	0x000000d8	/* SGMII RGMII status */
 
 /* GMAC Configuration defines */
 #define GMAC_CONTROL_TC	0x01000000	/* Transmit Conf. in RGMII/SGMII */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index bfe0226..ff4c79e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -194,58 +194,70 @@ static void dwmac1000_pmt(void __iomem *ioaddr, unsigned long mode)
 }
 
 
-static int dwmac1000_irq_status(void __iomem *ioaddr)
+static int dwmac1000_irq_status(void __iomem *ioaddr,
+				struct stmmac_extra_stats *x)
 {
 	u32 intr_status = readl(ioaddr + GMAC_INT_STATUS);
-	int status = 0;
+	int ret = 0;
 
 	/* Not used events (e.g. MMC interrupts) are not handled. */
 	if ((intr_status & mmc_tx_irq)) {
 		CHIP_DBG(KERN_INFO "GMAC: MMC tx interrupt: 0x%08x\n",
 		    readl(ioaddr + GMAC_MMC_TX_INTR));
-		status |= core_mmc_tx_irq;
+		x->mmc_tx_irq_n++;
 	}
 	if (unlikely(intr_status & mmc_rx_irq)) {
 		CHIP_DBG(KERN_INFO "GMAC: MMC rx interrupt: 0x%08x\n",
 		    readl(ioaddr + GMAC_MMC_RX_INTR));
-		status |= core_mmc_rx_irq;
+		x->mmc_rx_irq_n++;
 	}
 	if (unlikely(intr_status & mmc_rx_csum_offload_irq)) {
 		CHIP_DBG(KERN_INFO "GMAC: MMC rx csum offload: 0x%08x\n",
 		    readl(ioaddr + GMAC_MMC_RX_CSUM_OFFLOAD));
-		status |= core_mmc_rx_csum_offload_irq;
+		x->mmc_rx_csum_offload_irq_n++;
 	}
 	if (unlikely(intr_status & pmt_irq)) {
 		CHIP_DBG(KERN_INFO "GMAC: received Magic frame\n");
 		/* clear the PMT bits 5 and 6 by reading the PMT
 		 * status register. */
 		readl(ioaddr + GMAC_PMT);
-		status |= core_irq_receive_pmt_irq;
+		x->irq_receive_pmt_irq_n++;
 	}
 	/* MAC trx/rx EEE LPI entry/exit interrupts */
 	if (intr_status & lpiis_irq) {
 		/* Clean LPI interrupt by reading the Reg 12 */
-		u32 lpi_status = readl(ioaddr + LPI_CTRL_STATUS);
+		ret = readl(ioaddr + LPI_CTRL_STATUS);
 
-		if (lpi_status & LPI_CTRL_STATUS_TLPIEN) {
+		if (ret & LPI_CTRL_STATUS_TLPIEN) {
 			CHIP_DBG(KERN_INFO "GMAC TX entered in LPI\n");
-			status |= core_irq_tx_path_in_lpi_mode;
+			x->irq_tx_path_in_lpi_mode_n++;
 		}
-		if (lpi_status & LPI_CTRL_STATUS_TLPIEX) {
+		if (ret & LPI_CTRL_STATUS_TLPIEX) {
 			CHIP_DBG(KERN_INFO "GMAC TX exit from LPI\n");
-			status |= core_irq_tx_path_exit_lpi_mode;
+			x->irq_tx_path_exit_lpi_mode_n++;
 		}
-		if (lpi_status & LPI_CTRL_STATUS_RLPIEN) {
+		if (ret & LPI_CTRL_STATUS_RLPIEN) {
 			CHIP_DBG(KERN_INFO "GMAC RX entered in LPI\n");
-			status |= core_irq_rx_path_in_lpi_mode;
+			x->irq_rx_path_in_lpi_mode_n++;
 		}
-		if (lpi_status & LPI_CTRL_STATUS_RLPIEX) {
+		if (ret & LPI_CTRL_STATUS_RLPIEX) {
 			CHIP_DBG(KERN_INFO "GMAC RX exit from LPI\n");
-			status |= core_irq_rx_path_exit_lpi_mode;
+			x->irq_rx_path_exit_lpi_mode_n++;
 		}
 	}
 
-	return status;
+	if ((intr_status & pcs_ane_irq) || (intr_status & pcs_link_irq)) {
+		CHIP_DBG(KERN_INFO "GMAC PCS ANE IRQ\n");
+		readl(ioaddr + GMAC_AN_STATUS);
+		x->irq_pcs_ane_n++;
+	}
+	if (intr_status & rgmii_irq) {
+		CHIP_DBG(KERN_INFO "GMAC RGMII IRQ status\n");
+		readl(ioaddr + GMAC_S_R_GMII);
+		x->irq_rgmii_n++;
+	}
+
+	return ret;
 }
 
 static void  dwmac1000_set_eee_mode(void __iomem *ioaddr)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
index f83210e..cb86a58 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
@@ -72,7 +72,8 @@ static int dwmac100_rx_ipc_enable(void __iomem *ioaddr)
 	return 0;
 }
 
-static int dwmac100_irq_status(void __iomem *ioaddr)
+static int dwmac100_irq_status(void __iomem *ioaddr,
+			       struct stmmac_extra_stats *x)
 {
 	return 0;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index f6ad751..1793628 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -131,6 +131,10 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
 	STMMAC_STAT(l3_filter_match),
 	STMMAC_STAT(l4_filter_match),
 	STMMAC_STAT(l3_l4_filter_no_match),
+	/* PCS */
+	STMMAC_STAT(irq_pcs_ane_n),
+	STMMAC_STAT(irq_pcs_link_n),
+	STMMAC_STAT(irq_rgmii_n),
 };
 #define STMMAC_STATS_LEN ARRAY_SIZE(stmmac_gstrings_stats)
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 96fbf86..ca3e95a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1780,30 +1780,14 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
 	/* To handle GMAC own interrupts */
 	if (priv->plat->has_gmac) {
 		int status = priv->hw->mac->host_irq_status((void __iomem *)
-							    dev->base_addr);
+							    dev->base_addr,
+							    &priv->xstats);
 		if (unlikely(status)) {
-			if (status & core_mmc_tx_irq)
-				priv->xstats.mmc_tx_irq_n++;
-			if (status & core_mmc_rx_irq)
-				priv->xstats.mmc_rx_irq_n++;
-			if (status & core_mmc_rx_csum_offload_irq)
-				priv->xstats.mmc_rx_csum_offload_irq_n++;
-			if (status & core_irq_receive_pmt_irq)
-				priv->xstats.irq_receive_pmt_irq_n++;
-
 			/* For LPI we need to save the tx status */
-			if (status & core_irq_tx_path_in_lpi_mode) {
-				priv->xstats.irq_tx_path_in_lpi_mode_n++;
+			if (status & CORE_IRQ_TX_PATH_IN_LPI_MODE)
 				priv->tx_path_in_lpi_mode = true;
-			}
-			if (status & core_irq_tx_path_exit_lpi_mode) {
-				priv->xstats.irq_tx_path_exit_lpi_mode_n++;
+			if (status & CORE_IRQ_TX_PATH_EXIT_LPI_MODE)
 				priv->tx_path_in_lpi_mode = false;
-			}
-			if (status & core_irq_rx_path_in_lpi_mode)
-				priv->xstats.irq_rx_path_in_lpi_mode_n++;
-			if (status & core_irq_rx_path_exit_lpi_mode)
-				priv->xstats.irq_rx_path_exit_lpi_mode_n++;
 		}
 	}
 
-- 
1.7.4.4

^ permalink raw reply related

* [net-next.git 2/8 (v2)] stmmac: support extend descriptors
From: Giuseppe CAVALLARO @ 2013-03-26 14:43 UTC (permalink / raw)
  To: netdev; +Cc: rayagond, richardcochran, Giuseppe Cavallaro
In-Reply-To: <1364308992-27929-1-git-send-email-peppe.cavallaro@st.com>

This patch is to support the extend descriptors available
in the chips newer than the 3.50.

In case of the extend descriptors cannot be supported,
at runtime, the driver will continue to work using the old style.

In detail, this support extends the main descriptor structure
adding new descriptors: 4, 5, 6, 7. The desc4 gives us extra
information about the received ethernet payload when it is
carrying PTP packets or TCP/UDP/ICMP over IP packets.
The descriptors 6 and 7 are used for saving HW L/H timestamps (PTP).

V2: this new version removes the Koption added in the first implementation
because all the checks now to verify if the extended descriptors are
actually supported happen at probe time.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
 drivers/net/ethernet/stmicro/stmmac/chain_mode.c   |   29 +-
 drivers/net/ethernet/stmicro/stmmac/common.h       |   38 ++-
 drivers/net/ethernet/stmicro/stmmac/descs.h        |   51 +++-
 drivers/net/ethernet/stmicro/stmmac/dwmac1000.h    |    1 +
 .../net/ethernet/stmicro/stmmac/dwmac1000_dma.c    |    8 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c |    4 +-
 drivers/net/ethernet/stmicro/stmmac/enh_desc.c     |   97 +++--
 drivers/net/ethernet/stmicro/stmmac/norm_desc.c    |   45 +--
 drivers/net/ethernet/stmicro/stmmac/stmmac.h       |    7 +-
 .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c   |   23 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |  443 ++++++++++++++------
 11 files changed, 528 insertions(+), 218 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
index 08ff51e..688c3f4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
@@ -89,27 +89,38 @@ static unsigned int stmmac_is_jumbo_frm(int len, int enh_desc)
 	return ret;
 }
 
-static void stmmac_init_dma_chain(struct dma_desc *des, dma_addr_t phy_addr,
-				  unsigned int size)
+static void stmmac_init_dma_chain(void *des, dma_addr_t phy_addr,
+				  unsigned int size, unsigned int extend_desc)
 {
 	/*
 	 * In chained mode the des3 points to the next element in the ring.
 	 * The latest element has to point to the head.
 	 */
 	int i;
-	struct dma_desc *p = des;
 	dma_addr_t dma_phy = phy_addr;
 
-	for (i = 0; i < (size - 1); i++) {
-		dma_phy += sizeof(struct dma_desc);
-		p->des3 = (unsigned int)dma_phy;
-		p++;
+	if (extend_desc) {
+		struct dma_extended_desc *p = (struct dma_extended_desc *) des;
+		for (i = 0; i < (size - 1); i++) {
+			dma_phy += sizeof(struct dma_extended_desc);
+			p->basic.des3 = (unsigned int)dma_phy;
+			p++;
+		}
+		p->basic.des3 = (unsigned int)phy_addr;
+
+	} else {
+		struct dma_desc *p = (struct dma_desc *) des;
+		for (i = 0; i < (size - 1); i++) {
+			dma_phy += sizeof(struct dma_desc);
+			p->des3 = (unsigned int)dma_phy;
+			p++;
+		}
+		p->des3 = (unsigned int)phy_addr;
 	}
-	p->des3 = (unsigned int)phy_addr;
 }
 
 const struct stmmac_chain_mode_ops chain_mode_ops = {
+	.init = stmmac_init_dma_chain,
 	.is_jumbo_frm = stmmac_is_jumbo_frm,
 	.jumbo_frm = stmmac_jumbo_frm,
-	.init_dma_chain = stmmac_init_dma_chain,
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index a295532..8a04b7f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -117,6 +117,29 @@ struct stmmac_extra_stats {
 	unsigned long irq_rx_path_in_lpi_mode_n;
 	unsigned long irq_rx_path_exit_lpi_mode_n;
 	unsigned long phy_eee_wakeup_error_n;
+	/* Extended RDES status */
+	unsigned long ip_hdr_err;
+	unsigned long ip_payload_err;
+	unsigned long ip_csum_bypassed;
+	unsigned long ipv4_pkt_rcvd;
+	unsigned long ipv6_pkt_rcvd;
+	unsigned long rx_msg_type_ext_no_ptp;
+	unsigned long rx_msg_type_sync;
+	unsigned long rx_msg_type_follow_up;
+	unsigned long rx_msg_type_delay_req;
+	unsigned long rx_msg_type_delay_resp;
+	unsigned long rx_msg_type_pdelay_req;
+	unsigned long rx_msg_type_pdelay_resp;
+	unsigned long rx_msg_type_pdelay_follow_up;
+	unsigned long ptp_frame_type;
+	unsigned long ptp_ver;
+	unsigned long timestamp_dropped;
+	unsigned long av_pkt_rcvd;
+	unsigned long av_tagged_pkt_rcvd;
+	unsigned long vlan_tag_priority_val;
+	unsigned long l3_filter_match;
+	unsigned long l4_filter_match;
+	unsigned long l3_l4_filter_no_match;
 };
 
 /* CSR Frequency Access Defines*/
@@ -260,11 +283,10 @@ struct dma_features {
 
 struct stmmac_desc_ops {
 	/* DMA RX descriptor ring initialization */
-	void (*init_rx_desc) (struct dma_desc *p, unsigned int ring_size,
-			      int disable_rx_ic, int mode);
+	void (*init_rx_desc) (struct dma_desc *p, int disable_rx_ic, int mode,
+			      int end);
 	/* DMA TX descriptor ring initialization */
-	void (*init_tx_desc) (struct dma_desc *p, unsigned int ring_size,
-			      int mode);
+	void (*init_tx_desc) (struct dma_desc *p, int mode, int end);
 
 	/* Invoked by the xmit function to prepare the tx descriptor */
 	void (*prepare_tx_desc) (struct dma_desc *p, int is_fs, int len,
@@ -294,12 +316,14 @@ struct stmmac_desc_ops {
 	/* Return the reception status looking at the RDES1 */
 	int (*rx_status) (void *data, struct stmmac_extra_stats *x,
 			  struct dma_desc *p);
+	void (*rx_extended_status) (void *data, struct stmmac_extra_stats *x,
+				    struct dma_extended_desc *p);
 };
 
 struct stmmac_dma_ops {
 	/* DMA core initialization */
 	int (*init) (void __iomem *ioaddr, int pbl, int fb, int mb,
-		     int burst_len, u32 dma_tx, u32 dma_rx);
+		     int burst_len, u32 dma_tx, u32 dma_rx, int atds);
 	/* Dump DMA registers */
 	void (*dump_regs) (void __iomem *ioaddr);
 	/* Set tx/rx threshold in the csr6 register
@@ -371,10 +395,10 @@ struct stmmac_ring_mode_ops {
 };
 
 struct stmmac_chain_mode_ops {
+	void (*init) (void *des, dma_addr_t phy_addr, unsigned int size,
+		      unsigned int extend_desc);
 	unsigned int (*is_jumbo_frm) (int len, int ehn_desc);
 	unsigned int (*jumbo_frm) (void *priv, struct sk_buff *skb, int csum);
-	void (*init_dma_chain) (struct dma_desc *des, dma_addr_t phy_addr,
-				unsigned int size);
 };
 
 struct mac_device_info {
diff --git a/drivers/net/ethernet/stmicro/stmmac/descs.h b/drivers/net/ethernet/stmicro/stmmac/descs.h
index 223adf9..2eca0c0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/descs.h
+++ b/drivers/net/ethernet/stmicro/stmmac/descs.h
@@ -24,6 +24,7 @@
 #ifndef __DESCS_H__
 #define __DESCS_H__
 
+/* Basic descriptor structure for normal and alternate descriptors */
 struct dma_desc {
 	/* Receive descriptor */
 	union {
@@ -60,7 +61,7 @@ struct dma_desc {
 		} rx;
 		struct {
 			/* RDES0 */
-			u32 payload_csum_error:1;
+			u32 rx_mac_addr:1;
 			u32 crc_error:1;
 			u32 dribbling:1;
 			u32 error_gmii:1;
@@ -162,13 +163,57 @@ struct dma_desc {
 	unsigned int des3;
 };
 
+/* Extended descriptor structure (supported by new SYNP GMAC generations) */
+struct dma_extended_desc {
+	struct dma_desc basic;
+	union {
+		struct {
+			u32 ip_payload_type:3;
+			u32 ip_hdr_err:1;
+			u32 ip_payload_err:1;
+			u32 ip_csum_bypassed:1;
+			u32 ipv4_pkt_rcvd:1;
+			u32 ipv6_pkt_rcvd:1;
+			u32 msg_type:4;
+			u32 ptp_frame_type:1;
+			u32 ptp_ver:1;
+			u32 timestamp_dropped:1;
+			u32 reserved:1;
+			u32 av_pkt_rcvd:1;
+			u32 av_tagged_pkt_rcvd:1;
+			u32 vlan_tag_priority_val:3;
+			u32 reserved3:3;
+			u32 l3_filter_match:1;
+			u32 l4_filter_match:1;
+			u32 l3_l4_filter_no_match:2;
+			u32 reserved4:4;
+		} erx;
+		struct {
+			u32 reserved;
+		} etx;
+	} des4;
+	unsigned int des5; /* Reserved */
+	unsigned int des6; /* Tx/Rx Timestamp Low */
+	unsigned int des7; /* Tx/Rx Timestamp High */
+};
+
 /* Transmit checksum insertion control */
 enum tdes_csum_insertion {
 	cic_disabled = 0,	/* Checksum Insertion Control */
 	cic_only_ip = 1,	/* Only IP header */
-	cic_no_pseudoheader = 2,	/* IP header but pseudoheader
-					 * is not calculated */
+	/* IP header but pseudoheader is not calculated */
+	cic_no_pseudoheader = 2,
 	cic_full = 3,		/* IP header and pseudoheader */
 };
 
+/* Extended RDES4 definitions */
+#define RDES_EXT_NO_PTP			0
+#define RDES_EXT_SYNC			0x1
+#define RDES_EXT_FOLLOW_UP		0x2
+#define RDES_EXT_DELAY_REQ		0x3
+#define RDES_EXT_DELAY_RESP		0x4
+#define RDES_EXT_PDELAY_REQ		0x5
+#define RDES_EXT_PDELAY_RESP		0x6
+#define RDES_EXT_PDELAY_FOLLOW_UP	0x7
+
 #endif /* __DESCS_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
index 7ad56af..85466e5 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
@@ -155,6 +155,7 @@ enum inter_frame_gap {
 /* Programmable burst length (passed thorugh platform)*/
 #define DMA_BUS_MODE_PBL_MASK	0x00003f00	/* Programmable Burst Len */
 #define DMA_BUS_MODE_PBL_SHIFT	8
+#define DMA_BUS_MODE_ATDS	0x00000080	/* Alternate Descriptor Size */
 
 enum rx_tx_priority_ratio {
 	double_ratio = 0x00004000,	/*2:1 */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
index bf83c03..f1c4b2c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
@@ -30,8 +30,8 @@
 #include "dwmac1000.h"
 #include "dwmac_dma.h"
 
-static int dwmac1000_dma_init(void __iomem *ioaddr, int pbl, int fb,
-			      int mb, int burst_len, u32 dma_tx, u32 dma_rx)
+static int dwmac1000_dma_init(void __iomem *ioaddr, int pbl, int fb, int mb,
+			      int burst_len, u32 dma_tx, u32 dma_rx, int atds)
 {
 	u32 value = readl(ioaddr + DMA_BUS_MODE);
 	int limit;
@@ -73,6 +73,10 @@ static int dwmac1000_dma_init(void __iomem *ioaddr, int pbl, int fb,
 #ifdef CONFIG_STMMAC_DA
 	value |= DMA_BUS_MODE_DA;	/* Rx has priority over tx */
 #endif
+
+	if (atds)
+		value |= DMA_BUS_MODE_ATDS;
+
 	writel(value, ioaddr + DMA_BUS_MODE);
 
 	/* In case of GMAC AXI configuration, program the DMA_AXI_BUS_MODE
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
index c2b4d55..e979a8b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
@@ -32,8 +32,8 @@
 #include "dwmac100.h"
 #include "dwmac_dma.h"
 
-static int dwmac100_dma_init(void __iomem *ioaddr, int pbl, int fb,
-			     int mb, int burst_len, u32 dma_tx, u32 dma_rx)
+static int dwmac100_dma_init(void __iomem *ioaddr, int pbl, int fb, int mb,
+			     int burst_len, u32 dma_tx, u32 dma_rx, int atds)
 {
 	u32 value = readl(ioaddr + DMA_BUS_MODE);
 	int limit;
diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index 62f9f4e..c1b9ab2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -150,6 +150,57 @@ static int enh_desc_coe_rdes0(int ipc_err, int type, int payload_err)
 	return ret;
 }
 
+static void enh_desc_get_ext_status(void *data, struct stmmac_extra_stats *x,
+				    struct dma_extended_desc *p)
+{
+	if (unlikely(p->basic.des01.erx.rx_mac_addr)) {
+		if (p->des4.erx.ip_hdr_err)
+			x->ip_hdr_err++;
+		if (p->des4.erx.ip_payload_err)
+			x->ip_payload_err++;
+		if (p->des4.erx.ip_csum_bypassed)
+			x->ip_csum_bypassed++;
+		if (p->des4.erx.ipv4_pkt_rcvd)
+			x->ipv4_pkt_rcvd++;
+		if (p->des4.erx.ipv6_pkt_rcvd)
+			x->ipv6_pkt_rcvd++;
+		if (p->des4.erx.msg_type == RDES_EXT_SYNC)
+			x->rx_msg_type_sync++;
+		else if (p->des4.erx.msg_type == RDES_EXT_FOLLOW_UP)
+			x->rx_msg_type_follow_up++;
+		else if (p->des4.erx.msg_type == RDES_EXT_DELAY_REQ)
+			x->rx_msg_type_delay_req++;
+		else if (p->des4.erx.msg_type == RDES_EXT_DELAY_RESP)
+			x->rx_msg_type_delay_resp++;
+		else if (p->des4.erx.msg_type == RDES_EXT_DELAY_REQ)
+			x->rx_msg_type_pdelay_req++;
+		else if (p->des4.erx.msg_type == RDES_EXT_PDELAY_RESP)
+			x->rx_msg_type_pdelay_resp++;
+		else if (p->des4.erx.msg_type == RDES_EXT_PDELAY_FOLLOW_UP)
+			x->rx_msg_type_pdelay_follow_up++;
+		else
+			x->rx_msg_type_ext_no_ptp++;
+		if (p->des4.erx.ptp_frame_type)
+			x->ptp_frame_type++;
+		if (p->des4.erx.ptp_ver)
+			x->ptp_ver++;
+		if (p->des4.erx.timestamp_dropped)
+			x->timestamp_dropped++;
+		if (p->des4.erx.av_pkt_rcvd)
+			x->av_pkt_rcvd++;
+		if (p->des4.erx.av_tagged_pkt_rcvd)
+			x->av_tagged_pkt_rcvd++;
+		if (p->des4.erx.vlan_tag_priority_val)
+			x->vlan_tag_priority_val++;
+		if (p->des4.erx.l3_filter_match)
+			x->l3_filter_match++;
+		if (p->des4.erx.l4_filter_match)
+			x->l4_filter_match++;
+		if (p->des4.erx.l3_l4_filter_no_match)
+			x->l3_l4_filter_no_match++;
+	}
+}
+
 static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x,
 				  struct dma_desc *p)
 {
@@ -198,7 +249,7 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x,
 	 * At any rate, we need to understand if the CSUM hw computation is ok
 	 * and report this info to the upper layers. */
 	ret = enh_desc_coe_rdes0(p->des01.erx.ipc_csum_error,
-		p->des01.erx.frame_type, p->des01.erx.payload_csum_error);
+		p->des01.erx.frame_type, p->des01.erx.rx_mac_addr);
 
 	if (unlikely(p->des01.erx.dribbling)) {
 		CHIP_DBG(KERN_ERR "GMAC RX: dribbling error\n");
@@ -225,41 +276,32 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x,
 		x->rx_vlan++;
 	}
 #endif
+
 	return ret;
 }
 
-static void enh_desc_init_rx_desc(struct dma_desc *p, unsigned int ring_size,
-				  int disable_rx_ic, int mode)
+static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic,
+				  int mode, int end)
 {
-	int i;
-	for (i = 0; i < ring_size; i++) {
-		p->des01.erx.own = 1;
-		p->des01.erx.buffer1_size = BUF_SIZE_8KiB - 1;
+	p->des01.erx.own = 1;
+	p->des01.erx.buffer1_size = BUF_SIZE_8KiB - 1;
 
-		if (mode == STMMAC_CHAIN_MODE)
-			ehn_desc_rx_set_on_chain(p, (i == ring_size - 1));
-		else
-			ehn_desc_rx_set_on_ring(p, (i == ring_size - 1));
+	if (mode == STMMAC_CHAIN_MODE)
+		ehn_desc_rx_set_on_chain(p, end);
+	else
+		ehn_desc_rx_set_on_ring(p, end);
 
-		if (disable_rx_ic)
-			p->des01.erx.disable_ic = 1;
-		p++;
-	}
+	if (disable_rx_ic)
+		p->des01.erx.disable_ic = 1;
 }
 
-static void enh_desc_init_tx_desc(struct dma_desc *p, unsigned int ring_size,
-				  int mode)
+static void enh_desc_init_tx_desc(struct dma_desc *p, int mode, int end)
 {
-	int i;
-
-	for (i = 0; i < ring_size; i++) {
-		p->des01.etx.own = 0;
-		if (mode == STMMAC_CHAIN_MODE)
-			ehn_desc_tx_set_on_chain(p, (i == ring_size - 1));
-		else
-			ehn_desc_tx_set_on_ring(p, (i == ring_size - 1));
-		p++;
-	}
+	p->des01.etx.own = 0;
+	if (mode == STMMAC_CHAIN_MODE)
+		ehn_desc_tx_set_on_chain(p, end);
+	else
+		ehn_desc_tx_set_on_ring(p, end);
 }
 
 static int enh_desc_get_tx_owner(struct dma_desc *p)
@@ -352,4 +394,5 @@ const struct stmmac_desc_ops enh_desc_ops = {
 	.set_tx_owner = enh_desc_set_tx_owner,
 	.set_rx_owner = enh_desc_set_rx_owner,
 	.get_rx_frame_len = enh_desc_get_rx_frame_len,
+	.rx_extended_status = enh_desc_get_ext_status,
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index 88df0b4..47d5094 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -122,37 +122,28 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x,
 	return ret;
 }
 
-static void ndesc_init_rx_desc(struct dma_desc *p, unsigned int ring_size,
-			       int disable_rx_ic, int mode)
+static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode,
+			       int end)
 {
-	int i;
-	for (i = 0; i < ring_size; i++) {
-		p->des01.rx.own = 1;
-		p->des01.rx.buffer1_size = BUF_SIZE_2KiB - 1;
-
-		if (mode == STMMAC_CHAIN_MODE)
-			ndesc_rx_set_on_chain(p, (i == ring_size - 1));
-		else
-			ndesc_rx_set_on_ring(p, (i == ring_size - 1));
-
-		if (disable_rx_ic)
-			p->des01.rx.disable_ic = 1;
-		p++;
-	}
+	p->des01.rx.own = 1;
+	p->des01.rx.buffer1_size = BUF_SIZE_2KiB - 1;
+
+	if (mode == STMMAC_CHAIN_MODE)
+		ndesc_rx_set_on_chain(p, end);
+	else
+		ndesc_rx_set_on_ring(p, end);
+
+	if (disable_rx_ic)
+		p->des01.rx.disable_ic = 1;
 }
 
-static void ndesc_init_tx_desc(struct dma_desc *p, unsigned int ring_size,
-			       int mode)
+static void ndesc_init_tx_desc(struct dma_desc *p, int mode, int end)
 {
-	int i;
-	for (i = 0; i < ring_size; i++) {
-		p->des01.tx.own = 0;
-		if (mode == STMMAC_CHAIN_MODE)
-			ndesc_tx_set_on_chain(p, (i == (ring_size - 1)));
-		else
-			ndesc_tx_set_on_ring(p, (i == (ring_size - 1)));
-		p++;
-	}
+	p->des01.tx.own = 0;
+	if (mode == STMMAC_CHAIN_MODE)
+		ndesc_tx_set_on_chain(p, end);
+	else
+		ndesc_tx_set_on_ring(p, end);
 }
 
 static int ndesc_get_tx_owner(struct dma_desc *p)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index e5f2f33..9637d3e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -34,7 +34,8 @@
 
 struct stmmac_priv {
 	/* Frequently used values are kept adjacent for cache effect */
-	struct dma_desc *dma_tx ____cacheline_aligned;
+	struct dma_desc *dma_tx ____cacheline_aligned;	/* Basic TX desc */
+	struct dma_extended_desc *dma_etx;	/* Extended TX descriptor */
 	dma_addr_t dma_tx_phy;
 	struct sk_buff **tx_skbuff;
 	unsigned int cur_tx;
@@ -42,7 +43,8 @@ struct stmmac_priv {
 	unsigned int dma_tx_size;
 	int tx_coalesce;
 
-	struct dma_desc *dma_rx ;
+	struct dma_desc *dma_rx;		/* Basic RX descriptor */
+	struct dma_extended_desc *dma_erx;	/* Extended RX descriptor */
 	unsigned int cur_rx;
 	unsigned int dirty_rx;
 	struct sk_buff **rx_skbuff;
@@ -94,6 +96,7 @@ struct stmmac_priv {
 	int use_riwt;
 	u32 rx_riwt;
 	unsigned int mode;
+	int extend_desc;
 };
 
 extern int phyaddr;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index d1ac39c..f6ad751 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -108,6 +108,29 @@ static const struct stmmac_stats stmmac_gstrings_stats[] = {
 	STMMAC_STAT(irq_rx_path_in_lpi_mode_n),
 	STMMAC_STAT(irq_rx_path_exit_lpi_mode_n),
 	STMMAC_STAT(phy_eee_wakeup_error_n),
+	/* Extended RDES status */
+	STMMAC_STAT(ip_hdr_err),
+	STMMAC_STAT(ip_payload_err),
+	STMMAC_STAT(ip_csum_bypassed),
+	STMMAC_STAT(ipv4_pkt_rcvd),
+	STMMAC_STAT(ipv6_pkt_rcvd),
+	STMMAC_STAT(rx_msg_type_ext_no_ptp),
+	STMMAC_STAT(rx_msg_type_sync),
+	STMMAC_STAT(rx_msg_type_follow_up),
+	STMMAC_STAT(rx_msg_type_delay_req),
+	STMMAC_STAT(rx_msg_type_delay_resp),
+	STMMAC_STAT(rx_msg_type_pdelay_req),
+	STMMAC_STAT(rx_msg_type_pdelay_resp),
+	STMMAC_STAT(rx_msg_type_pdelay_follow_up),
+	STMMAC_STAT(ptp_frame_type),
+	STMMAC_STAT(ptp_ver),
+	STMMAC_STAT(timestamp_dropped),
+	STMMAC_STAT(av_pkt_rcvd),
+	STMMAC_STAT(av_tagged_pkt_rcvd),
+	STMMAC_STAT(vlan_tag_priority_val),
+	STMMAC_STAT(l3_filter_match),
+	STMMAC_STAT(l4_filter_match),
+	STMMAC_STAT(l3_l4_filter_no_match),
 };
 #define STMMAC_STATS_LEN ARRAY_SIZE(stmmac_gstrings_stats)
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index bbee6b3..96fbf86 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -468,29 +468,56 @@ static int stmmac_init_phy(struct net_device *dev)
 }
 
 /**
- * display_ring
+ * stmmac_display_ring
  * @p: pointer to the ring.
  * @size: size of the ring.
- * Description: display all the descriptors within the ring.
+ * Description: display the control/status and buffer descriptors.
  */
-static void display_ring(struct dma_desc *p, int size)
+static void stmmac_display_ring(void *head, int size, int extend_desc)
 {
-	struct tmp_s {
-		u64 a;
-		unsigned int b;
-		unsigned int c;
-	};
 	int i;
+	struct dma_extended_desc *ep = (struct dma_extended_desc *) head;
+	struct dma_desc *p = (struct dma_desc *) head;
+
 	for (i = 0; i < size; i++) {
-		struct tmp_s *x = (struct tmp_s *)(p + i);
-		pr_info("\t%d [0x%x]: DES0=0x%x DES1=0x%x BUF1=0x%x BUF2=0x%x",
-		       i, (unsigned int)virt_to_phys(&p[i]),
-		       (unsigned int)(x->a), (unsigned int)((x->a) >> 32),
-		       x->b, x->c);
+		u64 x;
+		if (extend_desc) {
+			x = *(u64 *) ep;
+			pr_info("%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n",
+				i, (unsigned int) virt_to_phys(ep),
+				(unsigned int) x, (unsigned int) (x >> 32),
+				ep->basic.des2, ep->basic.des3);
+			ep++;
+		} else {
+			x = *(u64 *) p;
+			pr_info("%d [0x%x]: 0x%x 0x%x 0x%x 0x%x",
+				i, (unsigned int) virt_to_phys(p),
+				(unsigned int) x, (unsigned int) (x >> 32),
+				p->des2, p->des3);
+			p++;
+		}
 		pr_info("\n");
 	}
 }
 
+static void stmmac_display_rings(struct stmmac_priv *priv)
+{
+	unsigned int txsize = priv->dma_tx_size;
+	unsigned int rxsize = priv->dma_rx_size;
+
+	if (priv->extend_desc) {
+		pr_info("Extended RX descriptor ring:\n");
+		stmmac_display_ring((void *) priv->dma_erx, rxsize, 1);
+		pr_info("Extended TX descriptor ring:\n");
+		stmmac_display_ring((void *) priv->dma_etx, txsize, 1);
+	} else {
+		pr_info("RX descriptor ring:\n");
+		stmmac_display_ring((void *)priv->dma_rx, rxsize, 0);
+		pr_info("TX descriptor ring:\n");
+		stmmac_display_ring((void *)priv->dma_tx, txsize, 0);
+	}
+}
+
 static int stmmac_set_bfsize(int mtu, int bufsize)
 {
 	int ret = bufsize;
@@ -507,6 +534,59 @@ static int stmmac_set_bfsize(int mtu, int bufsize)
 	return ret;
 }
 
+static void stmmac_clear_descriptors(struct stmmac_priv *priv)
+{
+	int i;
+	unsigned int txsize = priv->dma_tx_size;
+	unsigned int rxsize = priv->dma_rx_size;
+
+	/* Clear the Rx/Tx descriptors */
+	for (i = 0; i < rxsize; i++)
+		if (priv->extend_desc)
+			priv->hw->desc->init_rx_desc(&priv->dma_erx[i].basic,
+						     priv->use_riwt, priv->mode,
+						     (i == rxsize - 1));
+		else
+			priv->hw->desc->init_rx_desc(&priv->dma_rx[i],
+						     priv->use_riwt, priv->mode,
+						     (i == rxsize - 1));
+	for (i = 0; i < txsize; i++)
+		if (priv->extend_desc)
+			priv->hw->desc->init_tx_desc(&priv->dma_etx[i].basic,
+						     priv->mode,
+						     (i == txsize - 1));
+		else
+			priv->hw->desc->init_tx_desc(&priv->dma_tx[i],
+						     priv->mode,
+						     (i == txsize - 1));
+}
+
+static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
+				  int i)
+{
+	struct sk_buff *skb;
+
+	skb = __netdev_alloc_skb(priv->dev, priv->dma_buf_sz + NET_IP_ALIGN,
+				 GFP_KERNEL);
+	if (unlikely(skb == NULL)) {
+		pr_err("%s: Rx init fails; skb is NULL\n", __func__);
+		return 1;
+	}
+	skb_reserve(skb, NET_IP_ALIGN);
+	priv->rx_skbuff[i] = skb;
+	priv->rx_skbuff_dma[i] = dma_map_single(priv->device, skb->data,
+						priv->dma_buf_sz,
+						DMA_FROM_DEVICE);
+
+	p->des2 = priv->rx_skbuff_dma[i];
+
+	if ((priv->mode == STMMAC_RING_MODE) &&
+	    (priv->dma_buf_sz == BUF_SIZE_16KiB))
+		priv->hw->ring->init_desc3(p);
+
+	return 0;
+}
+
 /**
  * init_dma_desc_rings - init the RX/TX descriptor rings
  * @dev: net device structure
@@ -518,11 +598,9 @@ static void init_dma_desc_rings(struct net_device *dev)
 {
 	int i;
 	struct stmmac_priv *priv = netdev_priv(dev);
-	struct sk_buff *skb;
 	unsigned int txsize = priv->dma_tx_size;
 	unsigned int rxsize = priv->dma_rx_size;
 	unsigned int bfsize = 0;
-	int dis_ic = 0;
 
 	/* Set the max buffer size according to the DESC mode
 	 * and the MTU. Note that RING mode allows 16KiB bsize. */
@@ -535,50 +613,53 @@ static void init_dma_desc_rings(struct net_device *dev)
 	DBG(probe, INFO, "stmmac: txsize %d, rxsize %d, bfsize %d\n",
 	    txsize, rxsize, bfsize);
 
+	if (priv->extend_desc) {
+		priv->dma_erx = dma_alloc_coherent(priv->device, rxsize *
+						   sizeof(struct
+							  dma_extended_desc),
+						   &priv->dma_rx_phy,
+						   GFP_KERNEL);
+		priv->dma_etx = dma_alloc_coherent(priv->device, txsize *
+						   sizeof(struct
+							  dma_extended_desc),
+						   &priv->dma_tx_phy,
+						   GFP_KERNEL);
+		if ((!priv->dma_erx) || (!priv->dma_etx))
+			return;
+	} else {
+		priv->dma_rx = dma_alloc_coherent(priv->device, rxsize *
+						  sizeof(struct dma_desc),
+						  &priv->dma_rx_phy,
+						  GFP_KERNEL);
+		priv->dma_tx = dma_alloc_coherent(priv->device, txsize *
+						  sizeof(struct dma_desc),
+						  &priv->dma_tx_phy,
+						  GFP_KERNEL);
+		if ((!priv->dma_rx) || (!priv->dma_tx))
+			return;
+	}
+
 	priv->rx_skbuff_dma = kmalloc_array(rxsize, sizeof(dma_addr_t),
 					    GFP_KERNEL);
 	priv->rx_skbuff = kmalloc_array(rxsize, sizeof(struct sk_buff *),
 					GFP_KERNEL);
-	priv->dma_rx = dma_alloc_coherent(priv->device,
-					  rxsize * sizeof(struct dma_desc),
-					  &priv->dma_rx_phy, GFP_KERNEL);
 	priv->tx_skbuff = kmalloc_array(txsize, sizeof(struct sk_buff *),
 					GFP_KERNEL);
-	priv->dma_tx = dma_alloc_coherent(priv->device,
-					  txsize * sizeof(struct dma_desc),
-					  &priv->dma_tx_phy, GFP_KERNEL);
-
-	if ((priv->dma_rx == NULL) || (priv->dma_tx == NULL))
-		return;
-
-	DBG(probe, INFO, "stmmac (%s) DMA desc: virt addr (Rx %p, "
-	    "Tx %p)\n\tDMA phy addr (Rx 0x%08x, Tx 0x%08x)\n",
-	    dev->name, priv->dma_rx, priv->dma_tx,
-	    (unsigned int)priv->dma_rx_phy, (unsigned int)priv->dma_tx_phy);
+	if (netif_msg_drv(priv))
+		pr_debug("(%s) dma_rx_phy=0x%08x dma_tx_phy=0x%08x\n", __func__,
+			 (u32) priv->dma_rx_phy, (u32) priv->dma_tx_phy);
 
 	/* RX INITIALIZATION */
-	DBG(probe, INFO, "stmmac: SKB addresses:\n"
-			 "skb\t\tskb data\tdma data\n");
-
+	DBG(probe, INFO, "stmmac: SKB addresses:\nskb\t\tskb data\tdma data\n");
 	for (i = 0; i < rxsize; i++) {
-		struct dma_desc *p = priv->dma_rx + i;
+		struct dma_desc *p;
+		if (priv->extend_desc)
+			p = &((priv->dma_erx + i)->basic);
+		else
+			p = priv->dma_rx + i;
 
-		skb = __netdev_alloc_skb(dev, bfsize + NET_IP_ALIGN,
-					 GFP_KERNEL);
-		if (unlikely(skb == NULL)) {
-			pr_err("%s: Rx init fails; skb is NULL\n", __func__);
+		if (stmmac_init_rx_buffers(priv, p, i))
 			break;
-		}
-		skb_reserve(skb, NET_IP_ALIGN);
-		priv->rx_skbuff[i] = skb;
-		priv->rx_skbuff_dma[i] = dma_map_single(priv->device, skb->data,
-						bfsize, DMA_FROM_DEVICE);
-
-		p->des2 = priv->rx_skbuff_dma[i];
-
-		if ((priv->mode == STMMAC_RING_MODE) &&
-		    (bfsize == BUF_SIZE_16KiB))
-			priv->hw->ring->init_desc3(p);
 
 		DBG(probe, INFO, "[%p]\t[%p]\t[%x]\n", priv->rx_skbuff[i],
 			priv->rx_skbuff[i]->data, priv->rx_skbuff_dma[i]);
@@ -588,35 +669,39 @@ static void init_dma_desc_rings(struct net_device *dev)
 	priv->dma_buf_sz = bfsize;
 	buf_sz = bfsize;
 
+	/* Setup the chained descriptor addresses */
+	if (priv->mode == STMMAC_CHAIN_MODE) {
+		if (priv->extend_desc) {
+			priv->hw->chain->init(priv->dma_erx, priv->dma_rx_phy,
+					      rxsize, 1);
+			priv->hw->chain->init(priv->dma_etx, priv->dma_tx_phy,
+					      txsize, 1);
+		} else {
+			priv->hw->chain->init(priv->dma_rx, priv->dma_rx_phy,
+					      rxsize, 0);
+			priv->hw->chain->init(priv->dma_tx, priv->dma_tx_phy,
+					      txsize, 0);
+		}
+	}
+
 	/* TX INITIALIZATION */
 	for (i = 0; i < txsize; i++) {
+		struct dma_desc *p;
+		if (priv->extend_desc)
+			p = &((priv->dma_etx + i)->basic);
+		else
+			p = priv->dma_tx + i;
+		p->des2 = 0;
 		priv->tx_skbuff[i] = NULL;
-		priv->dma_tx[i].des2 = 0;
 	}
 
-	/* In case of Chained mode this sets the des3 to the next
-	 * element in the chain */
-	if (priv->mode == STMMAC_CHAIN_MODE) {
-		priv->hw->chain->init_dma_chain(priv->dma_rx, priv->dma_rx_phy,
-						rxsize);
-		priv->hw->chain->init_dma_chain(priv->dma_tx, priv->dma_tx_phy,
-						txsize);
-	}
 	priv->dirty_tx = 0;
 	priv->cur_tx = 0;
 
-	if (priv->use_riwt)
-		dis_ic = 1;
-	/* Clear the Rx/Tx descriptors */
-	priv->hw->desc->init_rx_desc(priv->dma_rx, rxsize, dis_ic, priv->mode);
-	priv->hw->desc->init_tx_desc(priv->dma_tx, txsize, priv->mode);
+	stmmac_clear_descriptors(priv);
 
-	if (netif_msg_hw(priv)) {
-		pr_info("RX descriptor ring:\n");
-		display_ring(priv->dma_rx, rxsize);
-		pr_info("TX descriptor ring:\n");
-		display_ring(priv->dma_tx, txsize);
-	}
+	if (netif_msg_hw(priv))
+		stmmac_display_rings(priv);
 }
 
 static void dma_free_rx_skbufs(struct stmmac_priv *priv)
@@ -639,7 +724,12 @@ static void dma_free_tx_skbufs(struct stmmac_priv *priv)
 
 	for (i = 0; i < priv->dma_tx_size; i++) {
 		if (priv->tx_skbuff[i] != NULL) {
-			struct dma_desc *p = priv->dma_tx + i;
+			struct dma_desc *p;
+			if (priv->extend_desc)
+				p = &((priv->dma_etx + i)->basic);
+			else
+				p = priv->dma_tx + i;
+
 			if (p->des2)
 				dma_unmap_single(priv->device, p->des2,
 						 priv->hw->desc->get_tx_len(p),
@@ -658,12 +748,21 @@ static void free_dma_desc_resources(struct stmmac_priv *priv)
 
 	/* Free the region of consistent memory previously allocated for
 	 * the DMA */
-	dma_free_coherent(priv->device,
-			  priv->dma_tx_size * sizeof(struct dma_desc),
-			  priv->dma_tx, priv->dma_tx_phy);
-	dma_free_coherent(priv->device,
-			  priv->dma_rx_size * sizeof(struct dma_desc),
-			  priv->dma_rx, priv->dma_rx_phy);
+	if (!priv->extend_desc) {
+		dma_free_coherent(priv->device,
+				  priv->dma_tx_size * sizeof(struct dma_desc),
+				  priv->dma_tx, priv->dma_tx_phy);
+		dma_free_coherent(priv->device,
+				  priv->dma_rx_size * sizeof(struct dma_desc),
+				  priv->dma_rx, priv->dma_rx_phy);
+	} else {
+		dma_free_coherent(priv->device, priv->dma_tx_size *
+				  sizeof(struct dma_extended_desc),
+				  priv->dma_etx, priv->dma_tx_phy);
+		dma_free_coherent(priv->device, priv->dma_rx_size *
+				  sizeof(struct dma_extended_desc),
+				  priv->dma_erx, priv->dma_rx_phy);
+	}
 	kfree(priv->rx_skbuff_dma);
 	kfree(priv->rx_skbuff);
 	kfree(priv->tx_skbuff);
@@ -710,13 +809,18 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
 		int last;
 		unsigned int entry = priv->dirty_tx % txsize;
 		struct sk_buff *skb = priv->tx_skbuff[entry];
-		struct dma_desc *p = priv->dma_tx + entry;
+		struct dma_desc *p;
+
+		if (priv->extend_desc)
+			p = (struct dma_desc *) (priv->dma_etx + entry);
+		else
+			p = priv->dma_tx + entry;
 
 		/* Check if the descriptor is owned by the DMA. */
 		if (priv->hw->desc->get_tx_owner(p))
 			break;
 
-		/* Verify tx error by looking at the last segment */
+		/* Verify tx error by looking at the last segment. */
 		last = priv->hw->desc->get_tx_ls(p);
 		if (likely(last)) {
 			int tx_error =
@@ -785,12 +889,21 @@ static inline void stmmac_disable_dma_irq(struct stmmac_priv *priv)
  */
 static void stmmac_tx_err(struct stmmac_priv *priv)
 {
+	int i;
+	int txsize = priv->dma_tx_size;
 	netif_stop_queue(priv->dev);
 
 	priv->hw->dma->stop_tx(priv->ioaddr);
 	dma_free_tx_skbufs(priv);
-	priv->hw->desc->init_tx_desc(priv->dma_tx, priv->dma_tx_size,
-				     priv->mode);
+	for (i = 0; i < txsize; i++)
+		if (priv->extend_desc)
+			priv->hw->desc->init_tx_desc(&priv->dma_etx[i].basic,
+						     priv->mode,
+						     (i == txsize - 1));
+		else
+			priv->hw->desc->init_tx_desc(&priv->dma_tx[i],
+						     priv->mode,
+						     (i == txsize - 1));
 	priv->dirty_tx = 0;
 	priv->cur_tx = 0;
 	priv->hw->dma->start_tx(priv->ioaddr);
@@ -864,6 +977,14 @@ static void stmmac_selec_desc_mode(struct stmmac_priv *priv)
 {
 	if (priv->plat->enh_desc) {
 		pr_info(" Enhanced/Alternate descriptors\n");
+
+		/* GMAC older than 3.50 has no extended descriptors */
+		if (priv->synopsys_id >= DWMAC_CORE_3_50) {
+			pr_info("\tEnabled extended descriptors\n");
+			priv->extend_desc = 1;
+		} else
+			pr_warn("Extended descriptors not supported\n");
+
 		priv->hw->desc = &enh_desc_ops;
 	} else {
 		pr_info(" Normal descriptors\n");
@@ -950,6 +1071,7 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 {
 	int pbl = DEFAULT_DMA_PBL, fixed_burst = 0, burst_len = 0;
 	int mixed_burst = 0;
+	int atds = 0;
 
 	/* Some DMA parameters can be passed from the platform;
 	 * in case of these are not passed we keep a default
@@ -961,9 +1083,12 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 		burst_len = priv->plat->dma_cfg->burst_len;
 	}
 
+	if (priv->extend_desc && (priv->mode == STMMAC_RING_MODE))
+		atds = 1;
+
 	return priv->hw->dma->init(priv->ioaddr, pbl, fixed_burst, mixed_burst,
 				   burst_len, priv->dma_tx_phy,
-				   priv->dma_rx_phy);
+				   priv->dma_rx_phy, atds);
 }
 
 /**
@@ -1237,7 +1362,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	csum_insertion = (skb->ip_summed == CHECKSUM_PARTIAL);
 
-	desc = priv->dma_tx + entry;
+	if (priv->extend_desc)
+		desc = (struct dma_desc *) (priv->dma_etx + entry);
+	else
+		desc = priv->dma_tx + entry;
+
 	first = desc;
 
 #ifdef STMMAC_XMIT_DEBUG
@@ -1268,14 +1397,17 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		priv->hw->desc->prepare_tx_desc(desc, 1, nopaged_len,
 						csum_insertion, priv->mode);
 	} else
-		desc = priv->dma_tx + entry;
+		desc = first;
 
 	for (i = 0; i < nfrags; i++) {
 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 		int len = skb_frag_size(frag);
 
 		entry = (++priv->cur_tx) % txsize;
-		desc = priv->dma_tx + entry;
+		if (priv->extend_desc)
+			desc = (struct dma_desc *) (priv->dma_etx + entry);
+		else
+			desc = priv->dma_tx + entry;
 
 		TX_DBG("\t[entry %d] segment len: %d\n", entry, len);
 		desc->des2 = skb_frag_dma_map(priv->device, frag, 0, len,
@@ -1319,7 +1451,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		       "first=%p, nfrags=%d\n",
 		       (priv->cur_tx % txsize), (priv->dirty_tx % txsize),
 		       entry, first, nfrags);
-		display_ring(priv->dma_tx, txsize);
+		if (priv->extend_desc)
+			stmmac_display_ring((void *)priv->dma_etx, txsize, 1);
+		else
+			stmmac_display_ring((void *)priv->dma_tx, txsize, 0);
+
 		pr_info(">>> frame to be transmitted: ");
 		print_pkt(skb->data, skb->len);
 	}
@@ -1344,10 +1480,16 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv)
 {
 	unsigned int rxsize = priv->dma_rx_size;
 	int bfsize = priv->dma_buf_sz;
-	struct dma_desc *p = priv->dma_rx;
 
 	for (; priv->cur_rx - priv->dirty_rx > 0; priv->dirty_rx++) {
 		unsigned int entry = priv->dirty_rx % rxsize;
+		struct dma_desc *p;
+
+		if (priv->extend_desc)
+			p = (struct dma_desc *) (priv->dma_erx + entry);
+		else
+			p = priv->dma_rx + entry;
+
 		if (likely(priv->rx_skbuff[entry] == NULL)) {
 			struct sk_buff *skb;
 
@@ -1361,16 +1503,16 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv)
 			    dma_map_single(priv->device, skb->data, bfsize,
 					   DMA_FROM_DEVICE);
 
-			(p + entry)->des2 = priv->rx_skbuff_dma[entry];
+			p->des2 = priv->rx_skbuff_dma[entry];
 
 			if (unlikely((priv->mode == STMMAC_RING_MODE) &&
 				     (priv->plat->has_gmac)))
-				priv->hw->ring->refill_desc3(bfsize, p + entry);
+				priv->hw->ring->refill_desc3(bfsize, p);
 
 			RX_DBG(KERN_INFO "\trefill entry #%d\n", entry);
 		}
 		wmb();
-		priv->hw->desc->set_rx_owner(p + entry);
+		priv->hw->desc->set_rx_owner(p);
 		wmb();
 	}
 }
@@ -1381,30 +1523,47 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
 	unsigned int entry = priv->cur_rx % rxsize;
 	unsigned int next_entry;
 	unsigned int count = 0;
-	struct dma_desc *p = priv->dma_rx + entry;
-	struct dma_desc *p_next;
 
 #ifdef STMMAC_RX_DEBUG
 	if (netif_msg_hw(priv)) {
 		pr_debug(">>> stmmac_rx: descriptor ring:\n");
-		display_ring(priv->dma_rx, rxsize);
+		if (priv->extend_desc)
+			stmmac_display_ring((void *) priv->dma_erx, rxsize, 1);
+		else
+			stmmac_display_ring((void *)priv->dma_rx, rxsize, 0);
 	}
 #endif
-	while (!priv->hw->desc->get_rx_owner(p)) {
+	while (count < limit) {
 		int status;
+		struct dma_desc *p, *p_next;
 
-		if (count >= limit)
+		if (priv->extend_desc)
+			p = (struct dma_desc *) (priv->dma_erx + entry);
+		else
+			p = priv->dma_rx + entry ;
+
+		if (priv->hw->desc->get_rx_owner(p))
 			break;
 
 		count++;
 
 		next_entry = (++priv->cur_rx) % rxsize;
-		p_next = priv->dma_rx + next_entry;
+		if (priv->extend_desc)
+			p_next = (struct dma_desc *) (priv->dma_erx +
+						      next_entry);
+		else
+			p_next = priv->dma_rx + next_entry;
+
 		prefetch(p_next);
 
 		/* read the status of the incoming frame */
-		status = (priv->hw->desc->rx_status(&priv->dev->stats,
-						    &priv->xstats, p));
+		status = priv->hw->desc->rx_status(&priv->dev->stats,
+						   &priv->xstats, p);
+		if ((priv->extend_desc) && (priv->hw->desc->rx_extended_status))
+			priv->hw->desc->rx_extended_status(&priv->dev->stats,
+							   &priv->xstats,
+							   priv->dma_erx +
+							   entry);
 		if (unlikely(status == discard_frame))
 			priv->dev->stats.rx_errors++;
 		else {
@@ -1459,7 +1618,6 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
 			priv->dev->stats.rx_bytes += frame_len;
 		}
 		entry = next_entry;
-		p = p_next;	/* use prefetched values */
 	}
 
 	stmmac_rx_refill(priv);
@@ -1697,40 +1855,51 @@ static struct dentry *stmmac_fs_dir;
 static struct dentry *stmmac_rings_status;
 static struct dentry *stmmac_dma_cap;
 
-static int stmmac_sysfs_ring_read(struct seq_file *seq, void *v)
+static void sysfs_display_ring(void *head, int size, int extend_desc,
+				struct seq_file *seq)
 {
-	struct tmp_s {
-		u64 a;
-		unsigned int b;
-		unsigned int c;
-	};
 	int i;
-	struct net_device *dev = seq->private;
-	struct stmmac_priv *priv = netdev_priv(dev);
-
-	seq_printf(seq, "=======================\n");
-	seq_printf(seq, " RX descriptor ring\n");
-	seq_printf(seq, "=======================\n");
+	struct dma_extended_desc *ep = (struct dma_extended_desc *) head;
+	struct dma_desc *p = (struct dma_desc *) head;
 
-	for (i = 0; i < priv->dma_rx_size; i++) {
-		struct tmp_s *x = (struct tmp_s *)(priv->dma_rx + i);
-		seq_printf(seq, "[%d] DES0=0x%x DES1=0x%x BUF1=0x%x BUF2=0x%x",
-			   i, (unsigned int)(x->a),
-			   (unsigned int)((x->a) >> 32), x->b, x->c);
+	for (i = 0; i < size; i++) {
+		u64 x;
+		if (extend_desc) {
+			x = *(u64 *) ep;
+			seq_printf(seq, "%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n",
+				   i, (unsigned int) virt_to_phys(ep),
+				   (unsigned int) x, (unsigned int) (x >> 32),
+				   ep->basic.des2, ep->basic.des3);
+			ep++;
+		} else {
+			x = *(u64 *) p;
+			seq_printf(seq, "%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n",
+				   i, (unsigned int) virt_to_phys(ep),
+				   (unsigned int) x, (unsigned int) (x >> 32),
+				   p->des2, p->des3);
+			p++;
+		}
 		seq_printf(seq, "\n");
 	}
+}
 
-	seq_printf(seq, "\n");
-	seq_printf(seq, "=======================\n");
-	seq_printf(seq, "  TX descriptor ring\n");
-	seq_printf(seq, "=======================\n");
+static int stmmac_sysfs_ring_read(struct seq_file *seq, void *v)
+{
+	struct net_device *dev = seq->private;
+	struct stmmac_priv *priv = netdev_priv(dev);
+	unsigned int txsize = priv->dma_tx_size;
+	unsigned int rxsize = priv->dma_rx_size;
 
-	for (i = 0; i < priv->dma_tx_size; i++) {
-		struct tmp_s *x = (struct tmp_s *)(priv->dma_tx + i);
-		seq_printf(seq, "[%d] DES0=0x%x DES1=0x%x BUF1=0x%x BUF2=0x%x",
-			   i, (unsigned int)(x->a),
-			   (unsigned int)((x->a) >> 32), x->b, x->c);
-		seq_printf(seq, "\n");
+	if (priv->extend_desc) {
+		seq_printf(seq, "Extended RX descriptor ring:\n");
+		sysfs_display_ring((void *) priv->dma_erx, rxsize, 1, seq);
+		seq_printf(seq, "Extended TX descriptor ring:\n");
+		sysfs_display_ring((void *) priv->dma_etx, txsize, 1, seq);
+	} else {
+		seq_printf(seq, "RX descriptor ring:\n");
+		sysfs_display_ring((void *)priv->dma_rx, rxsize, 0, seq);
+		seq_printf(seq, "TX descriptor ring:\n");
+		sysfs_display_ring((void *)priv->dma_tx, txsize, 0, seq);
 	}
 
 	return 0;
@@ -1895,7 +2064,7 @@ static const struct net_device_ops stmmac_netdev_ops = {
  */
 static int stmmac_hw_init(struct stmmac_priv *priv)
 {
-	int ret = 0;
+	int ret;
 	struct mac_device_info *mac;
 
 	/* Identify the MAC HW device */
@@ -1913,6 +2082,9 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 	/* Get and dump the chip ID */
 	priv->synopsys_id = stmmac_get_synopsys_id(priv);
 
+	/* To use alternate (extended) or normal descriptor structures */
+	stmmac_selec_desc_mode(priv);
+
 	/* To use the chained or ring mode */
 	if (chain_mode)	{
 		priv->hw->chain = &chain_mode_ops;
@@ -1947,9 +2119,6 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 	} else
 		pr_info(" No HW DMA feature register supported");
 
-	/* Select the enhnaced/normal descriptor structures */
-	stmmac_selec_desc_mode(priv);
-
 	/* Enable the IPC (Checksum Offload) and check if the feature has been
 	 * enabled during the core configuration. */
 	ret = priv->hw->mac->rx_ipc(priv->ioaddr);
@@ -1969,7 +2138,7 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 		device_set_wakeup_capable(priv->device, 1);
 	}
 
-	return ret;
+	return 0;
 }
 
 /**
@@ -2015,7 +2184,9 @@ struct stmmac_priv *stmmac_dvr_probe(struct device *device,
 		priv->plat->phy_addr = phyaddr;
 
 	/* Init MAC and get the capabilities */
-	stmmac_hw_init(priv);
+	ret = stmmac_hw_init(priv);
+	if (ret)
+		goto error_free_netdev;
 
 	ndev->netdev_ops = &stmmac_netdev_ops;
 
@@ -2086,6 +2257,7 @@ error_clk_get:
 	unregister_netdev(ndev);
 error_netdev_register:
 	netif_napi_del(&priv->napi);
+error_free_netdev:
 	free_netdev(ndev);
 
 	return NULL;
@@ -2119,7 +2291,6 @@ int stmmac_dvr_remove(struct net_device *ndev)
 int stmmac_suspend(struct net_device *ndev)
 {
 	struct stmmac_priv *priv = netdev_priv(ndev);
-	int dis_ic = 0;
 	unsigned long flags;
 
 	if (!ndev || !netif_running(ndev))
@@ -2133,19 +2304,13 @@ int stmmac_suspend(struct net_device *ndev)
 	netif_device_detach(ndev);
 	netif_stop_queue(ndev);
 
-	if (priv->use_riwt)
-		dis_ic = 1;
-
 	napi_disable(&priv->napi);
 
 	/* Stop TX/RX DMA */
 	priv->hw->dma->stop_tx(priv->ioaddr);
 	priv->hw->dma->stop_rx(priv->ioaddr);
-	/* Clear the Rx/Tx descriptors */
-	priv->hw->desc->init_rx_desc(priv->dma_rx, priv->dma_rx_size,
-				     dis_ic, priv->mode);
-	priv->hw->desc->init_tx_desc(priv->dma_tx, priv->dma_tx_size,
-				     priv->mode);
+
+	stmmac_clear_descriptors(priv);
 
 	/* Enable Power down mode by programming the PMT regs */
 	if (device_may_wakeup(priv->device))
-- 
1.7.4.4

^ permalink raw reply related

* [net-next.git 1/8] stmmac: reorganize chain/ring modes removing Koptions
From: Giuseppe CAVALLARO @ 2013-03-26 14:43 UTC (permalink / raw)
  To: netdev; +Cc: rayagond, richardcochran, Giuseppe Cavallaro
In-Reply-To: <1364308992-27929-1-git-send-email-peppe.cavallaro@st.com>

Previously we had two Koptions to decide if the stmmac
had to use either a ring or a chain to manage its descriptors.
This patch removes the Kernel configuration options and it allow us
to use the chain mode by passing a module option.
Ring mode continues to be the default.

Also with this patch, it will be easier to validate the driver built and
guarantee that all the two modes always compile fine.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
 drivers/net/ethernet/stmicro/stmmac/Kconfig       |   18 ----
 drivers/net/ethernet/stmicro/stmmac/Makefile      |    6 +-
 drivers/net/ethernet/stmicro/stmmac/chain_mode.c  |   36 ++-------
 drivers/net/ethernet/stmicro/stmmac/common.h      |   25 ++++--
 drivers/net/ethernet/stmicro/stmmac/descs_com.h   |   44 +++++-----
 drivers/net/ethernet/stmicro/stmmac/enh_desc.c    |   29 +++++--
 drivers/net/ethernet/stmicro/stmmac/norm_desc.c   |   29 +++++--
 drivers/net/ethernet/stmicro/stmmac/ring_mode.c   |   24 ++----
 drivers/net/ethernet/stmicro/stmmac/stmmac.h      |    1 +
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |   94 ++++++++++++++------
 10 files changed, 169 insertions(+), 137 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index c0ea838..f0720d0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -54,22 +54,4 @@ config STMMAC_DA
 	  By default, the DMA arbitration scheme is based on Round-robin
 	  (rx:tx priority is 1:1).
 
-choice
-	prompt "Select the DMA TX/RX descriptor operating modes"
-	depends on STMMAC_ETH
-	---help---
-	  This driver supports DMA descriptor to operate both in dual buffer
-	  (RING) and linked-list(CHAINED) mode. In RING mode each descriptor
-	  points to two data buffer pointers whereas in CHAINED mode they
-	  points to only one data buffer pointer.
-
-config STMMAC_RING
-	bool "Enable Descriptor Ring Mode"
-
-config STMMAC_CHAINED
-	bool "Enable Descriptor Chained Mode"
-
-endchoice
-
-
 endif
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index c8e8ea6..ae995a3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -1,9 +1,7 @@
 obj-$(CONFIG_STMMAC_ETH) += stmmac.o
-stmmac-$(CONFIG_STMMAC_RING) += ring_mode.o
-stmmac-$(CONFIG_STMMAC_CHAINED) += chain_mode.o
 stmmac-$(CONFIG_STMMAC_PLATFORM) += stmmac_platform.o
 stmmac-$(CONFIG_STMMAC_PCI) += stmmac_pci.o
-stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o	\
-	      dwmac_lib.o dwmac1000_core.o  dwmac1000_dma.o	\
+stmmac-objs:= stmmac_main.o stmmac_ethtool.o stmmac_mdio.o ring_mode.o	\
+	      chain_mode.o dwmac_lib.o dwmac1000_core.o  dwmac1000_dma.o \
 	      dwmac100_core.o dwmac100_dma.o enh_desc.o  norm_desc.o \
 	      mmc_core.o $(stmmac-y)
diff --git a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
index 0668659..08ff51e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
@@ -28,7 +28,7 @@
 
 #include "stmmac.h"
 
-unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
+static unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 {
 	struct stmmac_priv *priv = (struct stmmac_priv *) p;
 	unsigned int txsize = priv->dma_tx_size;
@@ -47,7 +47,7 @@ unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 
 	desc->des2 = dma_map_single(priv->device, skb->data,
 				    bmax, DMA_TO_DEVICE);
-	priv->hw->desc->prepare_tx_desc(desc, 1, bmax, csum);
+	priv->hw->desc->prepare_tx_desc(desc, 1, bmax, csum, STMMAC_CHAIN_MODE);
 
 	while (len != 0) {
 		entry = (++priv->cur_tx) % txsize;
@@ -57,8 +57,8 @@ unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 			desc->des2 = dma_map_single(priv->device,
 						    (skb->data + bmax * i),
 						    bmax, DMA_TO_DEVICE);
-			priv->hw->desc->prepare_tx_desc(desc, 0, bmax,
-							csum);
+			priv->hw->desc->prepare_tx_desc(desc, 0, bmax, csum,
+							STMMAC_CHAIN_MODE);
 			priv->hw->desc->set_tx_owner(desc);
 			priv->tx_skbuff[entry] = NULL;
 			len -= bmax;
@@ -67,8 +67,8 @@ unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 			desc->des2 = dma_map_single(priv->device,
 						    (skb->data + bmax * i), len,
 						    DMA_TO_DEVICE);
-			priv->hw->desc->prepare_tx_desc(desc, 0, len,
-							csum);
+			priv->hw->desc->prepare_tx_desc(desc, 0, len, csum,
+							STMMAC_CHAIN_MODE);
 			priv->hw->desc->set_tx_owner(desc);
 			priv->tx_skbuff[entry] = NULL;
 			len = 0;
@@ -89,18 +89,6 @@ static unsigned int stmmac_is_jumbo_frm(int len, int enh_desc)
 	return ret;
 }
 
-static void stmmac_refill_desc3(int bfsize, struct dma_desc *p)
-{
-}
-
-static void stmmac_init_desc3(int des3_as_data_buf, struct dma_desc *p)
-{
-}
-
-static void stmmac_clean_desc3(struct dma_desc *p)
-{
-}
-
 static void stmmac_init_dma_chain(struct dma_desc *des, dma_addr_t phy_addr,
 				  unsigned int size)
 {
@@ -120,18 +108,8 @@ static void stmmac_init_dma_chain(struct dma_desc *des, dma_addr_t phy_addr,
 	p->des3 = (unsigned int)phy_addr;
 }
 
-static int stmmac_set_16kib_bfsize(int mtu)
-{
-	/* Not supported */
-	return 0;
-}
-
-const struct stmmac_ring_mode_ops ring_mode_ops = {
+const struct stmmac_chain_mode_ops chain_mode_ops = {
 	.is_jumbo_frm = stmmac_is_jumbo_frm,
 	.jumbo_frm = stmmac_jumbo_frm,
-	.refill_desc3 = stmmac_refill_desc3,
-	.init_desc3 = stmmac_init_desc3,
 	.init_dma_chain = stmmac_init_dma_chain,
-	.clean_desc3 = stmmac_clean_desc3,
-	.set_16kib_bfsize = stmmac_set_16kib_bfsize,
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 186d148..a295532 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -255,23 +255,27 @@ struct dma_features {
 #define STMMAC_DEFAULT_LIT_LS_TIMER	0x3E8
 #define STMMAC_DEFAULT_TWT_LS_TIMER	0x0
 
+#define STMMAC_CHAIN_MODE	0x1
+#define STMMAC_RING_MODE	0x2
+
 struct stmmac_desc_ops {
 	/* DMA RX descriptor ring initialization */
 	void (*init_rx_desc) (struct dma_desc *p, unsigned int ring_size,
-			      int disable_rx_ic);
+			      int disable_rx_ic, int mode);
 	/* DMA TX descriptor ring initialization */
-	void (*init_tx_desc) (struct dma_desc *p, unsigned int ring_size);
+	void (*init_tx_desc) (struct dma_desc *p, unsigned int ring_size,
+			      int mode);
 
 	/* Invoked by the xmit function to prepare the tx descriptor */
 	void (*prepare_tx_desc) (struct dma_desc *p, int is_fs, int len,
-				 int csum_flag);
+				 int csum_flag, int mode);
 	/* Set/get the owner of the descriptor */
 	void (*set_tx_owner) (struct dma_desc *p);
 	int (*get_tx_owner) (struct dma_desc *p);
 	/* Invoked by the xmit function to close the tx descriptor */
 	void (*close_tx_desc) (struct dma_desc *p);
 	/* Clean the tx descriptor as soon as the tx irq is received */
-	void (*release_tx_desc) (struct dma_desc *p);
+	void (*release_tx_desc) (struct dma_desc *p, int mode);
 	/* Clear interrupt on tx frame completion. When this bit is
 	 * set an interrupt happens as soon as the frame is transmitted */
 	void (*clear_tx_ic) (struct dma_desc *p);
@@ -361,18 +365,24 @@ struct stmmac_ring_mode_ops {
 	unsigned int (*is_jumbo_frm) (int len, int ehn_desc);
 	unsigned int (*jumbo_frm) (void *priv, struct sk_buff *skb, int csum);
 	void (*refill_desc3) (int bfsize, struct dma_desc *p);
-	void (*init_desc3) (int des3_as_data_buf, struct dma_desc *p);
-	void (*init_dma_chain) (struct dma_desc *des, dma_addr_t phy_addr,
-				unsigned int size);
+	void (*init_desc3) (struct dma_desc *p);
 	void (*clean_desc3) (struct dma_desc *p);
 	int (*set_16kib_bfsize) (int mtu);
 };
 
+struct stmmac_chain_mode_ops {
+	unsigned int (*is_jumbo_frm) (int len, int ehn_desc);
+	unsigned int (*jumbo_frm) (void *priv, struct sk_buff *skb, int csum);
+	void (*init_dma_chain) (struct dma_desc *des, dma_addr_t phy_addr,
+				unsigned int size);
+};
+
 struct mac_device_info {
 	const struct stmmac_ops		*mac;
 	const struct stmmac_desc_ops	*desc;
 	const struct stmmac_dma_ops	*dma;
 	const struct stmmac_ring_mode_ops	*ring;
+	const struct stmmac_chain_mode_ops	*chain;
 	struct mii_regs mii;	/* MII register Addresses */
 	struct mac_link link;
 	unsigned int synopsys_uid;
@@ -390,5 +400,6 @@ extern void stmmac_set_mac(void __iomem *ioaddr, bool enable);
 
 extern void dwmac_dma_flush_tx_fifo(void __iomem *ioaddr);
 extern const struct stmmac_ring_mode_ops ring_mode_ops;
+extern const struct stmmac_chain_mode_ops chain_mode_ops;
 
 #endif /* __COMMON_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/descs_com.h b/drivers/net/ethernet/stmicro/stmmac/descs_com.h
index 7ee9499..20f83fc 100644
--- a/drivers/net/ethernet/stmicro/stmmac/descs_com.h
+++ b/drivers/net/ethernet/stmicro/stmmac/descs_com.h
@@ -30,26 +30,28 @@
 #ifndef __DESC_COM_H__
 #define __DESC_COM_H__
 
-#if defined(CONFIG_STMMAC_RING)
-static inline void ehn_desc_rx_set_on_ring_chain(struct dma_desc *p, int end)
+/* Specific functions used for Ring mode */
+
+/* Enhanced descriptors */
+static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end)
 {
 	p->des01.erx.buffer2_size = BUF_SIZE_8KiB - 1;
 	if (end)
 		p->des01.erx.end_ring = 1;
 }
 
-static inline void ehn_desc_tx_set_on_ring_chain(struct dma_desc *p, int end)
+static inline void ehn_desc_tx_set_on_ring(struct dma_desc *p, int end)
 {
 	if (end)
 		p->des01.etx.end_ring = 1;
 }
 
-static inline void enh_desc_end_tx_desc(struct dma_desc *p, int ter)
+static inline void enh_desc_end_tx_desc_on_ring(struct dma_desc *p, int ter)
 {
 	p->des01.etx.end_ring = ter;
 }
 
-static inline void enh_set_tx_desc_len(struct dma_desc *p, int len)
+static inline void enh_set_tx_desc_len_on_ring(struct dma_desc *p, int len)
 {
 	if (unlikely(len > BUF_SIZE_4KiB)) {
 		p->des01.etx.buffer1_size = BUF_SIZE_4KiB;
@@ -58,25 +60,26 @@ static inline void enh_set_tx_desc_len(struct dma_desc *p, int len)
 		p->des01.etx.buffer1_size = len;
 }
 
-static inline void ndesc_rx_set_on_ring_chain(struct dma_desc *p, int end)
+/* Normal descriptors */
+static inline void ndesc_rx_set_on_ring(struct dma_desc *p, int end)
 {
 	p->des01.rx.buffer2_size = BUF_SIZE_2KiB - 1;
 	if (end)
 		p->des01.rx.end_ring = 1;
 }
 
-static inline void ndesc_tx_set_on_ring_chain(struct dma_desc *p, int end)
+static inline void ndesc_tx_set_on_ring(struct dma_desc *p, int end)
 {
 	if (end)
 		p->des01.tx.end_ring = 1;
 }
 
-static inline void ndesc_end_tx_desc(struct dma_desc *p, int ter)
+static inline void ndesc_end_tx_desc_on_ring(struct dma_desc *p, int ter)
 {
 	p->des01.tx.end_ring = ter;
 }
 
-static inline void norm_set_tx_desc_len(struct dma_desc *p, int len)
+static inline void norm_set_tx_desc_len_on_ring(struct dma_desc *p, int len)
 {
 	if (unlikely(len > BUF_SIZE_2KiB)) {
 		p->des01.etx.buffer1_size = BUF_SIZE_2KiB - 1;
@@ -85,47 +88,48 @@ static inline void norm_set_tx_desc_len(struct dma_desc *p, int len)
 		p->des01.tx.buffer1_size = len;
 }
 
-#else
+/* Specific functions used for Chain mode */
 
-static inline void ehn_desc_rx_set_on_ring_chain(struct dma_desc *p, int end)
+/* Enhanced descriptors */
+static inline void ehn_desc_rx_set_on_chain(struct dma_desc *p, int end)
 {
 	p->des01.erx.second_address_chained = 1;
 }
 
-static inline void ehn_desc_tx_set_on_ring_chain(struct dma_desc *p, int end)
+static inline void ehn_desc_tx_set_on_chain(struct dma_desc *p, int end)
 {
 	p->des01.etx.second_address_chained = 1;
 }
 
-static inline void enh_desc_end_tx_desc(struct dma_desc *p, int ter)
+static inline void enh_desc_end_tx_desc_on_chain(struct dma_desc *p, int ter)
 {
 	p->des01.etx.second_address_chained = 1;
 }
 
-static inline void enh_set_tx_desc_len(struct dma_desc *p, int len)
+static inline void enh_set_tx_desc_len_on_chain(struct dma_desc *p, int len)
 {
 	p->des01.etx.buffer1_size = len;
 }
 
-static inline void ndesc_rx_set_on_ring_chain(struct dma_desc *p, int end)
+/* Normal descriptors */
+static inline void ndesc_rx_set_on_chain(struct dma_desc *p, int end)
 {
 	p->des01.rx.second_address_chained = 1;
 }
 
-static inline void ndesc_tx_set_on_ring_chain(struct dma_desc *p, int ring_size)
+static inline void ndesc_tx_set_on_chain(struct dma_desc *p, int
+						 ring_size)
 {
 	p->des01.tx.second_address_chained = 1;
 }
 
-static inline void ndesc_end_tx_desc(struct dma_desc *p, int ter)
+static inline void ndesc_end_tx_desc_on_chain(struct dma_desc *p, int ter)
 {
 	p->des01.tx.second_address_chained = 1;
 }
 
-static inline void norm_set_tx_desc_len(struct dma_desc *p, int len)
+static inline void norm_set_tx_desc_len_on_chain(struct dma_desc *p, int len)
 {
 	p->des01.tx.buffer1_size = len;
 }
-#endif
-
 #endif /* __DESC_COM_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index 2fc8ef9..62f9f4e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -229,14 +229,17 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x,
 }
 
 static void enh_desc_init_rx_desc(struct dma_desc *p, unsigned int ring_size,
-				  int disable_rx_ic)
+				  int disable_rx_ic, int mode)
 {
 	int i;
 	for (i = 0; i < ring_size; i++) {
 		p->des01.erx.own = 1;
 		p->des01.erx.buffer1_size = BUF_SIZE_8KiB - 1;
 
-		ehn_desc_rx_set_on_ring_chain(p, (i == ring_size - 1));
+		if (mode == STMMAC_CHAIN_MODE)
+			ehn_desc_rx_set_on_chain(p, (i == ring_size - 1));
+		else
+			ehn_desc_rx_set_on_ring(p, (i == ring_size - 1));
 
 		if (disable_rx_ic)
 			p->des01.erx.disable_ic = 1;
@@ -244,13 +247,17 @@ static void enh_desc_init_rx_desc(struct dma_desc *p, unsigned int ring_size,
 	}
 }
 
-static void enh_desc_init_tx_desc(struct dma_desc *p, unsigned int ring_size)
+static void enh_desc_init_tx_desc(struct dma_desc *p, unsigned int ring_size,
+				  int mode)
 {
 	int i;
 
 	for (i = 0; i < ring_size; i++) {
 		p->des01.etx.own = 0;
-		ehn_desc_tx_set_on_ring_chain(p, (i == ring_size - 1));
+		if (mode == STMMAC_CHAIN_MODE)
+			ehn_desc_tx_set_on_chain(p, (i == ring_size - 1));
+		else
+			ehn_desc_tx_set_on_ring(p, (i == ring_size - 1));
 		p++;
 	}
 }
@@ -280,20 +287,26 @@ static int enh_desc_get_tx_ls(struct dma_desc *p)
 	return p->des01.etx.last_segment;
 }
 
-static void enh_desc_release_tx_desc(struct dma_desc *p)
+static void enh_desc_release_tx_desc(struct dma_desc *p, int mode)
 {
 	int ter = p->des01.etx.end_ring;
 
 	memset(p, 0, offsetof(struct dma_desc, des2));
-	enh_desc_end_tx_desc(p, ter);
+	if (mode == STMMAC_CHAIN_MODE)
+		enh_desc_end_tx_desc_on_chain(p, ter);
+	else
+		enh_desc_end_tx_desc_on_ring(p, ter);
 }
 
 static void enh_desc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
-				     int csum_flag)
+				     int csum_flag, int mode)
 {
 	p->des01.etx.first_segment = is_fs;
 
-	enh_set_tx_desc_len(p, len);
+	if (mode == STMMAC_CHAIN_MODE)
+		enh_set_tx_desc_len_on_chain(p, len);
+	else
+		enh_set_tx_desc_len_on_ring(p, len);
 
 	if (likely(csum_flag))
 		p->des01.etx.checksum_insertion = cic_full;
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index 68962c5..88df0b4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -123,14 +123,17 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x,
 }
 
 static void ndesc_init_rx_desc(struct dma_desc *p, unsigned int ring_size,
-			       int disable_rx_ic)
+			       int disable_rx_ic, int mode)
 {
 	int i;
 	for (i = 0; i < ring_size; i++) {
 		p->des01.rx.own = 1;
 		p->des01.rx.buffer1_size = BUF_SIZE_2KiB - 1;
 
-		ndesc_rx_set_on_ring_chain(p, (i == ring_size - 1));
+		if (mode == STMMAC_CHAIN_MODE)
+			ndesc_rx_set_on_chain(p, (i == ring_size - 1));
+		else
+			ndesc_rx_set_on_ring(p, (i == ring_size - 1));
 
 		if (disable_rx_ic)
 			p->des01.rx.disable_ic = 1;
@@ -138,12 +141,16 @@ static void ndesc_init_rx_desc(struct dma_desc *p, unsigned int ring_size,
 	}
 }
 
-static void ndesc_init_tx_desc(struct dma_desc *p, unsigned int ring_size)
+static void ndesc_init_tx_desc(struct dma_desc *p, unsigned int ring_size,
+			       int mode)
 {
 	int i;
 	for (i = 0; i < ring_size; i++) {
 		p->des01.tx.own = 0;
-		ndesc_tx_set_on_ring_chain(p, (i == (ring_size - 1)));
+		if (mode == STMMAC_CHAIN_MODE)
+			ndesc_tx_set_on_chain(p, (i == (ring_size - 1)));
+		else
+			ndesc_tx_set_on_ring(p, (i == (ring_size - 1)));
 		p++;
 	}
 }
@@ -173,19 +180,25 @@ static int ndesc_get_tx_ls(struct dma_desc *p)
 	return p->des01.tx.last_segment;
 }
 
-static void ndesc_release_tx_desc(struct dma_desc *p)
+static void ndesc_release_tx_desc(struct dma_desc *p, int mode)
 {
 	int ter = p->des01.tx.end_ring;
 
 	memset(p, 0, offsetof(struct dma_desc, des2));
-	ndesc_end_tx_desc(p, ter);
+	if (mode == STMMAC_CHAIN_MODE)
+		ndesc_end_tx_desc_on_chain(p, ter);
+	else
+		ndesc_end_tx_desc_on_ring(p, ter);
 }
 
 static void ndesc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
-				  int csum_flag)
+				  int csum_flag, int mode)
 {
 	p->des01.tx.first_segment = is_fs;
-	norm_set_tx_desc_len(p, len);
+	if (mode == STMMAC_CHAIN_MODE)
+		norm_set_tx_desc_len_on_chain(p, len);
+	else
+		norm_set_tx_desc_len_on_ring(p, len);
 
 	if (likely(csum_flag))
 		p->des01.tx.checksum_insertion = cic_full;
diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
index 4b785e1..8a5e661 100644
--- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
@@ -49,8 +49,8 @@ static unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 		desc->des2 = dma_map_single(priv->device, skb->data,
 					    bmax, DMA_TO_DEVICE);
 		desc->des3 = desc->des2 + BUF_SIZE_4KiB;
-		priv->hw->desc->prepare_tx_desc(desc, 1, bmax,
-						csum);
+		priv->hw->desc->prepare_tx_desc(desc, 1, bmax, csum,
+						STMMAC_RING_MODE);
 		wmb();
 		entry = (++priv->cur_tx) % txsize;
 		desc = priv->dma_tx + entry;
@@ -58,7 +58,8 @@ static unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 		desc->des2 = dma_map_single(priv->device, skb->data + bmax,
 					    len, DMA_TO_DEVICE);
 		desc->des3 = desc->des2 + BUF_SIZE_4KiB;
-		priv->hw->desc->prepare_tx_desc(desc, 0, len, csum);
+		priv->hw->desc->prepare_tx_desc(desc, 0, len, csum,
+						STMMAC_RING_MODE);
 		wmb();
 		priv->hw->desc->set_tx_owner(desc);
 		priv->tx_skbuff[entry] = NULL;
@@ -66,7 +67,8 @@ static unsigned int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 		desc->des2 = dma_map_single(priv->device, skb->data,
 					    nopaged_len, DMA_TO_DEVICE);
 		desc->des3 = desc->des2 + BUF_SIZE_4KiB;
-		priv->hw->desc->prepare_tx_desc(desc, 1, nopaged_len, csum);
+		priv->hw->desc->prepare_tx_desc(desc, 1, nopaged_len, csum,
+						STMMAC_RING_MODE);
 	}
 
 	return entry;
@@ -89,17 +91,10 @@ static void stmmac_refill_desc3(int bfsize, struct dma_desc *p)
 		p->des3 = p->des2 + BUF_SIZE_8KiB;
 }
 
-/* In ring mode we need to fill the desc3 because it is used
- * as buffer */
-static void stmmac_init_desc3(int des3_as_data_buf, struct dma_desc *p)
-{
-	if (unlikely(des3_as_data_buf))
-		p->des3 = p->des2 + BUF_SIZE_8KiB;
-}
-
-static void stmmac_init_dma_chain(struct dma_desc *des, dma_addr_t phy_addr,
-				  unsigned int size)
+/* In ring mode we need to fill the desc3 because it is used as buffer */
+static void stmmac_init_desc3(struct dma_desc *p)
 {
+	p->des3 = p->des2 + BUF_SIZE_8KiB;
 }
 
 static void stmmac_clean_desc3(struct dma_desc *p)
@@ -121,7 +116,6 @@ const struct stmmac_ring_mode_ops ring_mode_ops = {
 	.jumbo_frm = stmmac_jumbo_frm,
 	.refill_desc3 = stmmac_refill_desc3,
 	.init_desc3 = stmmac_init_desc3,
-	.init_dma_chain = stmmac_init_dma_chain,
 	.clean_desc3 = stmmac_clean_desc3,
 	.set_16kib_bfsize = stmmac_set_16kib_bfsize,
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index b05df89..e5f2f33 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -93,6 +93,7 @@ struct stmmac_priv {
 	u32 tx_coal_timer;
 	int use_riwt;
 	u32 rx_riwt;
+	unsigned int mode;
 };
 
 extern int phyaddr;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index d02b446..bbee6b3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -130,6 +130,13 @@ module_param(eee_timer, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(eee_timer, "LPI tx expiration time in msec");
 #define STMMAC_LPI_TIMER(x) (jiffies + msecs_to_jiffies(x))
 
+/* By default the driver will use the ring mode to manage tx and rx descriptors
+ * but passing this value so user can force to use the chain instead of the ring
+ */
+static unsigned int chain_mode;
+module_param(chain_mode, int, S_IRUGO);
+MODULE_PARM_DESC(chain_mode, "To use chain instead of ring mode");
+
 static irqreturn_t stmmac_interrupt(int irq, void *dev_id);
 
 #ifdef CONFIG_STMMAC_DEBUG_FS
@@ -514,17 +521,15 @@ static void init_dma_desc_rings(struct net_device *dev)
 	struct sk_buff *skb;
 	unsigned int txsize = priv->dma_tx_size;
 	unsigned int rxsize = priv->dma_rx_size;
-	unsigned int bfsize;
+	unsigned int bfsize = 0;
 	int dis_ic = 0;
-	int des3_as_data_buf = 0;
 
 	/* Set the max buffer size according to the DESC mode
 	 * and the MTU. Note that RING mode allows 16KiB bsize. */
-	bfsize = priv->hw->ring->set_16kib_bfsize(dev->mtu);
+	if (priv->mode == STMMAC_RING_MODE)
+		bfsize = priv->hw->ring->set_16kib_bfsize(dev->mtu);
 
-	if (bfsize == BUF_SIZE_16KiB)
-		des3_as_data_buf = 1;
-	else
+	if (bfsize < BUF_SIZE_16KiB)
 		bfsize = stmmac_set_bfsize(dev->mtu, priv->dma_buf_sz);
 
 	DBG(probe, INFO, "stmmac: txsize %d, rxsize %d, bfsize %d\n",
@@ -571,7 +576,9 @@ static void init_dma_desc_rings(struct net_device *dev)
 
 		p->des2 = priv->rx_skbuff_dma[i];
 
-		priv->hw->ring->init_desc3(des3_as_data_buf, p);
+		if ((priv->mode == STMMAC_RING_MODE) &&
+		    (bfsize == BUF_SIZE_16KiB))
+			priv->hw->ring->init_desc3(p);
 
 		DBG(probe, INFO, "[%p]\t[%p]\t[%x]\n", priv->rx_skbuff[i],
 			priv->rx_skbuff[i]->data, priv->rx_skbuff_dma[i]);
@@ -589,17 +596,20 @@ static void init_dma_desc_rings(struct net_device *dev)
 
 	/* In case of Chained mode this sets the des3 to the next
 	 * element in the chain */
-	priv->hw->ring->init_dma_chain(priv->dma_rx, priv->dma_rx_phy, rxsize);
-	priv->hw->ring->init_dma_chain(priv->dma_tx, priv->dma_tx_phy, txsize);
-
+	if (priv->mode == STMMAC_CHAIN_MODE) {
+		priv->hw->chain->init_dma_chain(priv->dma_rx, priv->dma_rx_phy,
+						rxsize);
+		priv->hw->chain->init_dma_chain(priv->dma_tx, priv->dma_tx_phy,
+						txsize);
+	}
 	priv->dirty_tx = 0;
 	priv->cur_tx = 0;
 
 	if (priv->use_riwt)
 		dis_ic = 1;
 	/* Clear the Rx/Tx descriptors */
-	priv->hw->desc->init_rx_desc(priv->dma_rx, rxsize, dis_ic);
-	priv->hw->desc->init_tx_desc(priv->dma_tx, txsize);
+	priv->hw->desc->init_rx_desc(priv->dma_rx, rxsize, dis_ic, priv->mode);
+	priv->hw->desc->init_tx_desc(priv->dma_tx, txsize, priv->mode);
 
 	if (netif_msg_hw(priv)) {
 		pr_info("RX descriptor ring:\n");
@@ -726,14 +736,15 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
 			dma_unmap_single(priv->device, p->des2,
 					 priv->hw->desc->get_tx_len(p),
 					 DMA_TO_DEVICE);
-		priv->hw->ring->clean_desc3(p);
+		if (priv->mode == STMMAC_RING_MODE)
+			priv->hw->ring->clean_desc3(p);
 
 		if (likely(skb != NULL)) {
 			dev_kfree_skb(skb);
 			priv->tx_skbuff[entry] = NULL;
 		}
 
-		priv->hw->desc->release_tx_desc(p);
+		priv->hw->desc->release_tx_desc(p, priv->mode);
 
 		priv->dirty_tx++;
 	}
@@ -778,7 +789,8 @@ static void stmmac_tx_err(struct stmmac_priv *priv)
 
 	priv->hw->dma->stop_tx(priv->ioaddr);
 	dma_free_tx_skbufs(priv);
-	priv->hw->desc->init_tx_desc(priv->dma_tx, priv->dma_tx_size);
+	priv->hw->desc->init_tx_desc(priv->dma_tx, priv->dma_tx_size,
+				     priv->mode);
 	priv->dirty_tx = 0;
 	priv->cur_tx = 0;
 	priv->hw->dma->start_tx(priv->ioaddr);
@@ -1190,7 +1202,7 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct stmmac_priv *priv = netdev_priv(dev);
 	unsigned int txsize = priv->dma_tx_size;
 	unsigned int entry;
-	int i, csum_insertion = 0;
+	int i, csum_insertion = 0, is_jumbo = 0;
 	int nfrags = skb_shinfo(skb)->nr_frags;
 	struct dma_desc *desc, *first;
 	unsigned int nopaged_len = skb_headlen(skb);
@@ -1236,15 +1248,27 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 #endif
 	priv->tx_skbuff[entry] = skb;
 
-	if (priv->hw->ring->is_jumbo_frm(skb->len, priv->plat->enh_desc)) {
-		entry = priv->hw->ring->jumbo_frm(priv, skb, csum_insertion);
-		desc = priv->dma_tx + entry;
+	/* To program the descriptors according to the size of the frame */
+	if (priv->mode == STMMAC_RING_MODE) {
+		is_jumbo = priv->hw->ring->is_jumbo_frm(skb->len,
+							priv->plat->enh_desc);
+		if (unlikely(is_jumbo))
+			entry = priv->hw->ring->jumbo_frm(priv, skb,
+							  csum_insertion);
 	} else {
+		is_jumbo = priv->hw->chain->is_jumbo_frm(skb->len,
+							priv->plat->enh_desc);
+		if (unlikely(is_jumbo))
+			entry = priv->hw->chain->jumbo_frm(priv, skb,
+							   csum_insertion);
+	}
+	if (likely(!is_jumbo)) {
 		desc->des2 = dma_map_single(priv->device, skb->data,
 					nopaged_len, DMA_TO_DEVICE);
 		priv->hw->desc->prepare_tx_desc(desc, 1, nopaged_len,
-						csum_insertion);
-	}
+						csum_insertion, priv->mode);
+	} else
+		desc = priv->dma_tx + entry;
 
 	for (i = 0; i < nfrags; i++) {
 		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
@@ -1257,7 +1281,8 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		desc->des2 = skb_frag_dma_map(priv->device, frag, 0, len,
 					      DMA_TO_DEVICE);
 		priv->tx_skbuff[entry] = NULL;
-		priv->hw->desc->prepare_tx_desc(desc, 0, len, csum_insertion);
+		priv->hw->desc->prepare_tx_desc(desc, 0, len, csum_insertion,
+						priv->mode);
 		wmb();
 		priv->hw->desc->set_tx_owner(desc);
 		wmb();
@@ -1338,7 +1363,8 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv)
 
 			(p + entry)->des2 = priv->rx_skbuff_dma[entry];
 
-			if (unlikely(priv->plat->has_gmac))
+			if (unlikely((priv->mode == STMMAC_RING_MODE) &&
+				     (priv->plat->has_gmac)))
 				priv->hw->ring->refill_desc3(bfsize, p + entry);
 
 			RX_DBG(KERN_INFO "\trefill entry #%d\n", entry);
@@ -1884,12 +1910,20 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 
 	priv->hw = mac;
 
-	/* To use the chained or ring mode */
-	priv->hw->ring = &ring_mode_ops;
-
 	/* Get and dump the chip ID */
 	priv->synopsys_id = stmmac_get_synopsys_id(priv);
 
+	/* To use the chained or ring mode */
+	if (chain_mode)	{
+		priv->hw->chain = &chain_mode_ops;
+		pr_info(" Chain mode enabled\n");
+		priv->mode = STMMAC_CHAIN_MODE;
+	} else {
+		priv->hw->ring = &ring_mode_ops;
+		pr_info(" Ring mode enabled\n");
+		priv->mode = STMMAC_RING_MODE;
+	}
+
 	/* Get the HW capability (new GMAC newer than 3.50a) */
 	priv->hw_cap_support = stmmac_get_hw_features(priv);
 	if (priv->hw_cap_support) {
@@ -2109,8 +2143,9 @@ int stmmac_suspend(struct net_device *ndev)
 	priv->hw->dma->stop_rx(priv->ioaddr);
 	/* Clear the Rx/Tx descriptors */
 	priv->hw->desc->init_rx_desc(priv->dma_rx, priv->dma_rx_size,
-				     dis_ic);
-	priv->hw->desc->init_tx_desc(priv->dma_tx, priv->dma_tx_size);
+				     dis_ic, priv->mode);
+	priv->hw->desc->init_tx_desc(priv->dma_tx, priv->dma_tx_size,
+				     priv->mode);
 
 	/* Enable Power down mode by programming the PMT regs */
 	if (device_may_wakeup(priv->device))
@@ -2249,6 +2284,9 @@ static int __init stmmac_cmdline_opt(char *str)
 		} else if (!strncmp(opt, "eee_timer:", 10)) {
 			if (kstrtoint(opt + 10, 0, &eee_timer))
 				goto err;
+		} else if (!strncmp(opt, "chain_mode:", 11)) {
+			if (kstrtoint(opt + 11, 0, &chain_mode))
+				goto err;
 		}
 	}
 	return 0;
-- 
1.7.4.4

^ permalink raw reply related

* [net-next.git 0/8 (v2)] stmmac: update to March_2013 (ext desc, PTP, SGMII)
From: Giuseppe CAVALLARO @ 2013-03-26 14:43 UTC (permalink / raw)
  To: netdev; +Cc: rayagond, richardcochran, Giuseppe Cavallaro

These patches enhance the driver adding the PTP support and the initial code
for RGMII/SGMII/TBI/RTBI modes.
Also this patches review the driver removing some Koption for selecting between
chain and ring modes. REally useful to validate the driver also at build time.
Before adding PTP, the extended descriptor support has been added because it
is mandatory to save HW timestamp in new dedicated descriptors. Also in this
case no Koption added.

Concerning the PTP, I have hacked/reviewed and tested many
part of these patches also verifying the back compatibility on
several HW and chips.

Concerning the SGMII/RGMII we have already discussed about the support
in the net.dev Mailing list with Byungho where these patchs were partially
analysed.
So I have only ported them against the latest net-next (and on
top of PTP). I have added some missing things: e.g. some parts of the
ethtool for ANE. As we clarified with Byungho, we will add further
enhancements on top of these patches if needed.

I have also built all against ARM/SH/X68 platforms and no issues on
ST-Boxes.

Thx goes to Rayagond that wrote and tested the PTP and to Byungho for SGMII.

V2: This Version 2 has the fixes discussed in the ML, for example:
    o completely remove the Koption... all the decisions are made at probe time
    o review the PTP patches and better organize them just in two patches
    o added all the fixes provided by Richard on PTP and CLK driver.

Giuseppe Cavallaro (5):
  stmmac: reorganize chain/ring modes removing Koptions
  stmmac: support extend descriptors
  stmmac: start adding pcs and rgmii core irq
  stmmac: initial support to manage pcs modes
  stmmac: update the Doc and Version (PTP+SGMII)

Rayagond Kokatanur (3):
  stmmac: add tx_skbuff_dma to save descriptors used by PTP
  stmmac: add IEEE PTPv1 and PTPv2 support.
  stmmac: add the support for PTP hw clock driver

 Documentation/networking/stmmac.txt                |   33 +-
 drivers/net/ethernet/stmicro/stmmac/Kconfig        |   19 +-
 drivers/net/ethernet/stmicro/stmmac/Makefile       |    8 +-
 drivers/net/ethernet/stmicro/stmmac/chain_mode.c   |   90 ++-
 drivers/net/ethernet/stmicro/stmmac/common.h       |  122 ++-
 drivers/net/ethernet/stmicro/stmmac/descs.h        |   51 +-
 drivers/net/ethernet/stmicro/stmmac/descs_com.h    |   44 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac1000.h    |   40 +-
 .../net/ethernet/stmicro/stmmac/dwmac1000_core.c   |  104 ++-
 .../net/ethernet/stmicro/stmmac/dwmac1000_dma.c    |    8 +-
 .../net/ethernet/stmicro/stmmac/dwmac100_core.c    |    3 +-
 drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c |    4 +-
 drivers/net/ethernet/stmicro/stmmac/enh_desc.c     |  151 +++-
 drivers/net/ethernet/stmicro/stmmac/norm_desc.c    |   85 ++-
 drivers/net/ethernet/stmicro/stmmac/ring_mode.c    |   38 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac.h       |   23 +-
 .../net/ethernet/stmicro/stmmac/stmmac_ethtool.c   |  156 +++-
 .../net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c  |  148 +++
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |  994 ++++++++++++++++----
 drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c   |  215 +++++
 drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h   |   74 ++
 21 files changed, 2019 insertions(+), 391 deletions(-)
 create mode 100644 drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c
 create mode 100644 drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c
 create mode 100644 drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.h

-- 
1.7.4.4

^ permalink raw reply

* Re: [RFC][PATCH] iproute: Faster ip link add, set and delete
From: Serge Hallyn @ 2013-03-26 14:33 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Benoit Lourdelet, Stephen Hemminger, netdev@vger.kernel.org
In-Reply-To: <87wqsu72r6.fsf@xmission.com>

Actually, lxc is using random names now, so it's ok.

Benoit, can you use the patches from Eric with lxc (or use the script
you were using before but specify names as he said)?

-serge

^ permalink raw reply

* defxx: skb_push() failing?
From: David Oostdyk @ 2013-03-26 14:29 UTC (permalink / raw)
  To: netdev; +Cc: macro

Hello,

In dfx_xmt_queue_pkt() in defxx.c, there is a skb_push(3) call which 
makes room for 3 packet request header bytes.  There is some discussion 
in the driver explaining why those three bytes will be available.  I 
have an old FDDI card that I'm trying to bring up:

05:05.0 FDDI network controller: Digital Equipment Corporation 
PCI-to-PDQ Interface Chip [PFI] (rev 02)

Most skbuffs that come through dfx_xmit_queue_pkt() have 11 bytes 
between skb->head and skb->data.  On the other hand, at almost exactly 
60-second intervals, an skb arrives that has zero bytes between 
skb->head and skb->data.  This normally causes a kernel panic, and for 
the time I just skip over such skb's.

Does anyone have advice on where I should start digging to find the 
cause of this?

Thanks in advance!
- David Oostdyk

^ permalink raw reply

* Re: [PATCH] libertas: drop maintainership
From: Dan Williams @ 2013-03-26 14:22 UTC (permalink / raw)
  To: John W. Linville
  Cc: Joe Perches, netdev, linux-wireless, Daniel Drake, Bing Zhao
In-Reply-To: <20130325185942.GC17454@tuxdriver.com>

On Mon, 2013-03-25 at 14:59 -0400, John W. Linville wrote:
> On Mon, Mar 18, 2013 at 12:51:35PM -0500, Dan Williams wrote:
> > On Mon, 2013-03-18 at 10:23 -0700, Joe Perches wrote:
> > > On Mon, 2013-03-18 at 11:48 -0500, Dan Williams wrote:
> > > > Would be better maintained by somebody who actualy has time for it.
> > > []
> > > > diff --git a/MAINTAINERS b/MAINTAINERS
> > > []
> > > > -MARVELL LIBERTAS WIRELESS DRIVER
> > > > -M:	Dan Williams <dcbw@redhat.com>
> > > > -L:	libertas-dev@lists.infradead.org
> > > > -S:	Maintained
> > > > -F:	drivers/net/wireless/libertas/
> > > 
> > > I think it better to mark it as Orphan
> > > and maybe leave the list.
> > > 
> > > Maybe:
> > > 
> > > MARVELL LIBERTAS WIRELESS DRIVER
> > > L:	libertas-dev@lists.infradead.org
> > > S:	Orphan
> > > F:	drivers/net/wireless/libertas/
> > > 
> > > or
> > > 
> > > MARVELL LIBERTAS WIRELESS DRIVER
> > > S:	Orphan
> > > F:	drivers/net/wireless/libertas/
> > 
> > I can do that; I wasn't quite sure how to do this.  A quick check showed
> > patches that did what mine did, and oddly MAINTAINERS has no section for
> > dropping maintainership that I could quickly find.  If this is what
> > others prefer I'm happy to resubmit?
> 
> I probably would prefer an "Orphan" listing as well...

Ok, will do.

Dan

^ permalink raw reply

* kmem_cache_create(nf_conntrack_expect): Cache name already exists.
From: Dave Jones @ 2013-03-26 14:18 UTC (permalink / raw)
  To: netdev

We had a user report this against a 3.6.6 kernel.
Given the uptimes he usually sees on that box, it may be a while before
he gets a chance to see this again if it hasn't been fixed.

Does this look familiar to anyone ?

Mar 21 04:00:12 kernel: [8176848.470356] nf_conntrack version 0.5.0 (16049 buckets, 64196 max)
Mar 21 04:00:12 kernel: [8176848.471261] kmem_cache_create(nf_conntrack_expect): Cache name already exists.
Mar 21 04:00:12 kernel: [8176848.471794] Pid: 32711, comm: modprobe Not tainted 3.6.6-1.fc16.i686 #1
Mar 21 04:00:12 kernel: [8176848.472321] Call Trace:
Mar 21 04:00:12 kernel: [8176848.472959]  [<c0511b44>] kmem_cache_create+0x144/0x190
Mar 21 04:00:12 kernel: [8176848.473684]  [<f7d8b7e8>] nf_conntrack_expect_init+0xe8/0x120 [nf_conntrack]
Mar 21 04:00:12 kernel: [8176848.474358]  [<f7d89b06>] nf_conntrack_init+0xe6/0x320 [nf_conntrack]
Mar 21 04:00:12 kernel: [8176848.475045]  [<f7d8a164>] nf_conntrack_net_init+0x14/0x170 [nf_conntrack]
Mar 21 04:00:12 kernel: [8176848.475732]  [<f7da9000>] ? 0xf7da8fff
Mar 21 04:00:12 kernel: [8176848.476394]  [<c085c1b9>] ops_init+0x39/0x110
Mar 21 04:00:12 kernel: [8176848.477052]  [<c085c41c>] register_pernet_operations+0xcc/0x140
Mar 21 04:00:12 kernel: [8176848.477686]  [<c085c511>] register_pernet_subsys+0x21/0x40
Mar 21 04:00:12 kernel: [8176848.478348]  [<f7da900d>] nf_conntrack_standalone_init+0xd/0x1000 [nf_conntrack]
Mar 21 04:00:12 kernel: [8176848.478965]  [<c0401124>] do_one_initcall+0x34/0x170
Mar 21 04:00:12 kernel: [8176848.479608]  [<f7da9000>] ? 0xf7da8fff
Mar 21 04:00:12 kernel: [8176848.480227]  [<c049977c>] sys_init_module+0xfcc/0x1cf0
Mar 21 04:00:12 kernel: [8176848.480821]  [<c055b73a>] ? mntput_no_expire+0x3a/0x110
Mar 21 04:00:12 kernel: [8176848.481456]  [<c0958f5f>] sysenter_do_call+0x12/0x28

user has a cron job that restarts his firewall setup every morning, and this occurred
during that, while aparently things were low on memory..


Mar 21 04:00:12 modprobe: FATAL: Error inserting iptable_nat (/lib/modules/3.6.6-1.fc16.i686/kernel/net/ipv4/netfilter/iptable_nat.ko): Cannot allocate memory

^ permalink raw reply

* Re: [RFC][PATCH] iproute: Faster ip link add, set and delete
From: Serge Hallyn @ 2013-03-26 14:17 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Benoit Lourdelet, Stephen Hemminger, netdev@vger.kernel.org
In-Reply-To: <87wqsu72r6.fsf@xmission.com>

Quoting Eric W. Biederman (ebiederm@xmission.com):
> Specifing the names would dramatically improve your creation
> performance.  It should only take you about 10s for 5000 veth pairs.
> But you have to specify the names.

Thanks, Eric.  I'm going to update lxc to always specify names for
the veth pairs, rather than only when they are requested by the
user's configuration file.

-serge

^ permalink raw reply

* [PATCH] bonding: cleanup unneeded rcu_read_lock()
From: Veaceslav Falico @ 2013-03-26 14:10 UTC (permalink / raw)
  To: netdev; +Cc: vfalico, andy, fubar

bond_resend_igmp_join_requests_delayed() calls _resend_igmp_join_requests()
under rcu_read_lock(), while it gets its own rcu_read_lock() for the whole
function. Remove the lock from the _delayed function.

Signed-off-by: Veaceslav Falico <vfalico@redhat.com>
---
 drivers/net/bonding/bond_main.c |    3 +--
 1 files changed, 1 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 6bbd90e..11a8cb3 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -796,9 +796,8 @@ static void bond_resend_igmp_join_requests_delayed(struct work_struct *work)
 {
 	struct bonding *bond = container_of(work, struct bonding,
 					    mcast_work.work);
-	rcu_read_lock();
+
 	bond_resend_igmp_join_requests(bond);
-	rcu_read_unlock();
 }
 
 /*
-- 
1.7.1

^ permalink raw reply related

* [PATCH net-next] MAINTAINERS: add netdev list for PTP (IEEE 1588)
From: Jiri Benc @ 2013-03-26 14:01 UTC (permalink / raw)
  To: netdev; +Cc: Richard Cochran

Signed-off-by: Jiri Benc <jbenc@redhat.com>
---
 MAINTAINERS |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 86c0843..77b3748 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6310,6 +6310,7 @@ F:	drivers/acpi/apei/erst.c
 
 PTP HARDWARE CLOCK SUPPORT
 M:	Richard Cochran <richardcochran@gmail.com>
+L:	netdev@vger.kernel.org
 S:	Maintained
 W:	http://linuxptp.sourceforge.net/
 F:	Documentation/ABI/testing/sysfs-ptp
-- 
1.7.6.5

^ permalink raw reply related

* [PATCH net-next] ptp_pch: fix typo in module parameter description
From: Jiri Benc @ 2013-03-26 13:54 UTC (permalink / raw)
  To: netdev; +Cc: Richard Cochran, Takahiro Shimizu

Signed-off-by: Jiri Benc <jbenc@redhat.com>
---
 drivers/ptp/ptp_pch.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/ptp/ptp_pch.c b/drivers/ptp/ptp_pch.c
index 1367655..e85926b 100644
--- a/drivers/ptp/ptp_pch.c
+++ b/drivers/ptp/ptp_pch.c
@@ -725,7 +725,7 @@ module_exit(ptp_pch_exit);
 
 module_param_string(station, pch_param.station, sizeof pch_param.station, 0444);
 MODULE_PARM_DESC(station,
-	 "IEEE 1588 station address to use - column separated hex values");
+	 "IEEE 1588 station address to use - colon separated hex values");
 
 MODULE_AUTHOR("LAPIS SEMICONDUCTOR, <tshimizu818@gmail.com>");
 MODULE_DESCRIPTION("PTP clock using the EG20T timer");
-- 
1.7.6.5

^ permalink raw reply related

* niu lock-up (Transmit timed out, resetting) and NETDEV WATCHDOG
From: Andrew Brooks @ 2013-03-26 13:46 UTC (permalink / raw)
  To: Linux Net-Dev Mailing List
In-Reply-To: <CAHOfOo21+hSwFrXZuoMppcivcOonhx-m1p-yyZsm6c5UCh0joQ@mail.gmail.com>

Hello

Using niu driver for this card: Oracle/SUN Multithreaded 10-Gigabit
Ethernet Network Controller
after a period (often less than 24 hours) the interface will hang with
errors every 5 seconds
"niu: xxx: eth2: Transmit timed out, resetting"

Sometimes also in syslog are messages
WARNING: at sch_generic:255 dev_watchdog
NETDEV WATCHDOG: eth2 (niu): transmit queue 10 timed out

I've seen this in kernel 3.5.0-26-generic #42~precise1-Ubuntu SMP
but I've not seen it in kernel 3.2.0-38-generic #61-Ubuntu SMP

Is there some change between kernels which has broken the driver
or is the difference elsewhere?

Thanks

Andrew

^ permalink raw reply

* Re: [Eulerkernel] [PATCH] af_unix: dont send SCM_CREDENTIAL when dest socket is NULL
From: Eric Dumazet @ 2013-03-26 13:46 UTC (permalink / raw)
  To: dingtianhong; +Cc: David S. Miller, Eric Dumazet, netdev, Li Zefan, Xinwei Hu
In-Reply-To: <515187F6.4030905@huawei.com>

On Tue, 2013-03-26 at 19:35 +0800, dingtianhong wrote:

> I think if not call scm_set_creds(), the credential would useles in recvmsg().
> we could remove code:
> 		if (check_creds) {
>                         /* Never glue messages from different writers */
>                         if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
>                             (UNIXCB(skb).cred != siocb->scm->cred))
>                                 break;
>                 } else {
>                         /* Copy credentials */
>                         scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
>                         check_creds = 1;
>                 }

Are you paraphrasing me or saying something different ?
 

^ permalink raw reply

* Re: [RFC][PATCH] iproute: Faster ip link add, set and delete
From: Eric W. Biederman @ 2013-03-26 12:40 UTC (permalink / raw)
  To: Benoit Lourdelet; +Cc: Stephen Hemminger, netdev@vger.kernel.org, Serge Hallyn
In-Reply-To: <CD773D0D.7614%blourdel@juniper.net>

Benoit Lourdelet <blourdel@juniper.net> writes:

> Hello,
>
> I re-tested with the patch and got the following results on a 32x 2Ghz
> core system.
>
> # veth 	add 	delete
> 1000 	36 	34
> 3000 	259 	137
> 4000 	462 	195
> 5000 	729     N/A
>
> The script to create is the following :
> for i in `seq 1 5000`; do
> 	sudo ip link add type veth
> Done

Which performs horribly as I mentioned earlier because you are asking
the kernel to create the names.  If you want performance you need to
specify the names of the network devices you are creating.

aka ip link add a$i type veth name b$i

> The script to delete:
> for d in /sys/class/net/veth*; do
> 	ip link del `basename $d` 2>/dev/null || true
> Done
>
> There is a very good improvement in deletion.
>
>
>
> iproute2 does not seems to be well multithread as I get time divided by a
> factor of 2 with a 8x  3.2 Ghz core system.

All netlink traffic and all network stack configuration is serialized by
the rtnl_lock in the kernel.  This is the slow path in the kernel, not
the fast path.

> I don¹t know if that is the improvement you expected ?
>
> Would the iproute2 redesign you mentioned help improve performance even
> further ?

Specifing the names would dramatically improve your creation
performance.  It should only take you about 10s for 5000 veth pairs.
But you have to specify the names.

Anyway I have exhausted my time, and inclination in this matter.  Good
luck with whatever your problem is.

Eric

^ permalink raw reply

* Re: [PATCH v2 net-next 08/12] 6lowpan: store fragment tag values per device instead of net stack wide
From: Sergei Shtylyov @ 2013-03-26 12:38 UTC (permalink / raw)
  To: Tony Cheneau
  Cc: David S. Miller, Eric Dumazet, Alan Ott, Alexander Smirnov,
	netdev, linux-zigbee-devel
In-Reply-To: <1364270372-19430-9-git-send-email-tony.cheneau@amnesiak.org>

Hello.

On 26-03-2013 7:59, Tony Cheneau wrote:

> Signed-off-by: Tony Cheneau <tony.cheneau@amnesiak.org>
> ---
>   net/ieee802154/6lowpan.c | 9 +++++----
>   1 file changed, 5 insertions(+), 4 deletions(-)

> diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
> index 61eee9d..f952451 100644
> --- a/net/ieee802154/6lowpan.c
> +++ b/net/ieee802154/6lowpan.c
> @@ -104,6 +104,7 @@ static const u8 lowpan_llprefix[] = {0xfe, 0x80};
>   struct lowpan_dev_info {
>   	struct net_device	*real_dev; /* real WPAN device ptr */
>   	struct mutex		dev_list_mtx; /* mutex for list ops */
> +	unsigned short fragment_tag;

    Small formatting nit: align field name with the others above please.

WBR, Sergei

^ permalink raw reply

* Re: [RFC][PATCH] iproute: Faster ip link add, set and delete
From: Benoit Lourdelet @ 2013-03-26 11:51 UTC (permalink / raw)
  To: Stephen Hemminger, Eric W. Biederman; +Cc: netdev@vger.kernel.org, Serge Hallyn
In-Reply-To: <CAOaVG17Cjj3epC2LRkDVdoqNWno=XjH4nhfiNVeceS=0d=Nyrw@mail.gmail.com>

Hello,

I re-tested with the patch and got the following results on a 32x 2Ghz
core system.

# veth 	add 	delete
1000 	36 	34
3000 	259 	137
4000 	462 	195
5000 	729     N/A

The script to create is the following :
for i in `seq 1 5000`; do
	sudo ip link add type veth
Done


The script to delete:
for d in /sys/class/net/veth*; do
	ip link del `basename $d` 2>/dev/null || true
Done

There is a very good improvement in deletion.



iproute2 does not seems to be well multithread as I get time divided by a
factor of 2 with a 8x  3.2 Ghz core system.

I don¹t know if that is the improvement you expected ?

Would the iproute2 redesign you mentioned help improve performance even
further ?


As a reference : Iproute2 baseline w/o patch:

# veth 	add 	delete

1000 	57 	70
2000 	193 	250
3000 	435 	510
4000 	752 	824
5000 	1123 	1185

Regards

Benoit




On 22/03/2013 23:27, "Stephen Hemminger" <stephen@networkplumber.org>
wrote:

>The whole ifindex map is a design mistake at this point.
>Better off to do a lazy cache or something like that.
>
>
>On Fri, Mar 22, 2013 at 3:23 PM, Eric W. Biederman
><ebiederm@xmission.com> wrote:
>>
>> Because ip link add, set, and delete map the interface name to the
>> interface index by dumping all of the interfaces before performing
>> their respective commands.  Operations that should be constant time
>> slow down when lots of network interfaces are in use.  Resulting
>> in O(N^2) time to work with O(N) devices.
>>
>> Make the work that iproute does constant time by passing the interface
>> name to the kernel instead.
>>
>> In small scale testing on my system this shows dramatic performance
>> increases of ip link add from 120s to just 11s to add 5000 network
>> devices.  And from longer than I cared to wait to just 58s to delete
>> all of those interfaces again.
>>
>> Cc: Serge Hallyn <serge.hallyn@ubuntu.com>
>> Reported-by: Benoit Lourdelet <blourdel@juniper.net>
>> Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
>> ---
>>
>> I think I am bungling the case where people specify an ifindex as ifNNNN
>> but does anyone care?
>>
>>  ip/iplink.c |   19 +------------------
>>  1 files changed, 1 insertions(+), 18 deletions(-)
>>
>> diff --git a/ip/iplink.c b/ip/iplink.c
>> index ad33611..6dffbf0 100644
>> --- a/ip/iplink.c
>> +++ b/ip/iplink.c
>> @@ -533,8 +533,6 @@ static int iplink_modify(int cmd, unsigned int
>>flags, int argc, char **argv)
>>                 }
>>         }
>>
>> -       ll_init_map(&rth);
>> -
>>         if (!(flags & NLM_F_CREATE)) {
>>                 if (!dev) {
>>                         fprintf(stderr, "Not enough information:
>>\"dev\" "
>> @@ -542,27 +540,12 @@ static int iplink_modify(int cmd, unsigned int
>>flags, int argc, char **argv)
>>                         exit(-1);
>>                 }
>>
>> -               req.i.ifi_index = ll_name_to_index(dev);
>> -               if (req.i.ifi_index == 0) {
>> -                       fprintf(stderr, "Cannot find device \"%s\"\n",
>>dev);
>> -                       return -1;
>> -               }
>> +               name = dev;
>>         } else {
>>                 /* Allow "ip link add dev" and "ip link add name" */
>>                 if (!name)
>>                         name = dev;
>>
>> -               if (link) {
>> -                       int ifindex;
>> -
>> -                       ifindex = ll_name_to_index(link);
>> -                       if (ifindex == 0) {
>> -                               fprintf(stderr, "Cannot find device
>>\"%s\"\n",
>> -                                       link);
>> -                               return -1;
>> -                       }
>> -                       addattr_l(&req.n, sizeof(req), IFLA_LINK,
>>&ifindex, 4);
>> -               }
>>         }
>>
>>         if (name) {
>> --
>> 1.7.5.4
>>
>

^ permalink raw reply

* Re: [Linux-zigbee-devel] [PATCH v2 net-next 05/12] 6lowpan: use short IEEE 802.15.4 addresses for broadcast destination
From: Alexander Aring @ 2013-03-26 11:56 UTC (permalink / raw)
  To: Tony Cheneau
  Cc: David S. Miller, Eric Dumazet, netdev, linux-zigbee-devel,
	Alan Ott
In-Reply-To: <1364270372-19430-6-git-send-email-tony.cheneau@amnesiak.org>

Hi Tony,

On Mon, Mar 25, 2013 at 11:59:25PM -0400, Tony Cheneau wrote:
> The IEEE 802.15.4 standard uses the 0xFFFF short address (2 bytes) for message
> broadcasting.
> 
> Signed-off-by: Tony Cheneau <tony.cheneau@amnesiak.org>
> ---
>  net/ieee802154/6lowpan.c | 23 +++++++++++++++--------
>  1 file changed, 15 insertions(+), 8 deletions(-)
> 
> diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
> index e7f61de..0eebb96 100644
> --- a/net/ieee802154/6lowpan.c
> +++ b/net/ieee802154/6lowpan.c
> @@ -572,21 +572,28 @@ static int lowpan_header_create(struct sk_buff *skb,
>  	 * this isn't implemented in mainline yet, so currently we assign 0xff
>  	 */
>  	{
> +		mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA;
> +
>  		/* prepare wpan address data */
>  		sa.addr_type = IEEE802154_ADDR_LONG;
>  		sa.pan_id = 0xff;
> -
> -		da.addr_type = IEEE802154_ADDR_LONG;
> -		da.pan_id = 0xff;
> -
> -		memcpy(&(da.hwaddr), daddr, 8);
>  		memcpy(&(sa.hwaddr), saddr, 8);
>  
> -		mac_cb(skb)->flags = IEEE802154_FC_TYPE_DATA;
> +		da.pan_id = 0xff;
> +		/*
> +		 * if the destination address is the broadcast address, use the
> +		 * corresponding short address
> +		 */
> +		if (lowpan_is_addr_broadcast(daddr)) {
> +			da.addr_type = IEEE802154_ADDR_SHORT;
> +			da.short_addr = IEEE802154_ADDR_BROADCAST;
> +		} else {
> +			da.addr_type = IEEE802154_ADDR_LONG;
> +			memcpy(&(da.hwaddr), daddr, 8);

It's some nitpick here.
Maybe it's better to use IEEE802154_ADDR_LEN instead of 8.

I mean we have this define, and we should use it if we mean the
ieee802154 address length.

Alex

^ permalink raw reply

* Re: [PATCH v2 net-next 00/12] 6lowpan: Some more bug fixes
From: Alan Ott @ 2013-03-26 11:48 UTC (permalink / raw)
  To: David S. Miller
  Cc: Tony Cheneau, Eric Dumazet, Alexander Smirnov, netdev,
	linux-zigbee-devel
In-Reply-To: <1364270372-19430-1-git-send-email-tony.cheneau@amnesiak.org>

On 03/25/2013 11:59 PM, Tony Cheneau wrote:
> This patchset fixes serious bugs within the 6LoWPAN modules. I wrote a script
> (available at [1]) to prove the issues are real.  One can try and see that
> without these patches, most of the test fail (e.g. packet dropped by the
> receiver or node crashing). With all patches applied, all tests succeed. The
> tests themselves are very basic: sending ICMP packets, sending UDP packets,
> sending TCP packets, varying size of the packets. This actually triggers some
> 6LoWPAN specific code, namely fragmentation, packet reassembly and header
> compression.
> 
> This code passed the checkpatch.pl tool with a few warnings, that I believe
> are OK. It should apply cleanly on the latest net-next.
> 

I and have been running some form of this patchset since October, and
have reviewed it several times.

Reviewed-by: Alan Ott <alan@signal11.us>
Tested-by: Alan Ott <alan@signal11.us>


> Regards,
> 	Tony Cheneau
> 
> [1]: https://github.com/tcheneau/linux802154-regression-tests
> 
> Tony Cheneau (12):
>   6lowpan: lowpan_is_iid_16_bit_compressable() does not detect
>     compressible address correctly
>   6lowpan: next header is not properly set upon decompression of a UDP
>     header.
>   6lowpan: always enable link-layer acknowledgments
>   mac802154: turn on ACK when enabled by the upper layers
>   6lowpan: use short IEEE 802.15.4 addresses for broadcast destination
>   6lowpan: fix first fragment (FRAG1) handling
>   6lowpan: add debug messages for 6LoWPAN fragmentation
>   6lowpan: store fragment tag values per device instead of net stack
>     wide
>   mac802154: re-introduce mac802154_dev_get_dsn()
>   6lowpan: obtain IEEE802.15.4 sequence number from the MAC layer
>   6lowpan: use the PANID provided by the device instead of a static
>     value
>   6lowpan: modify udp compression/uncompression to match the standard
> 
>  net/ieee802154/6lowpan.c  | 136 +++++++++++++++++++++++++++++++++++++---------
>  net/ieee802154/6lowpan.h  |   7 ++-
>  net/mac802154/mac802154.h |   1 +
>  net/mac802154/mac_cmd.c   |   1 +
>  net/mac802154/mib.c       |   9 +++
>  net/mac802154/wpan.c      |   2 +
>  6 files changed, 127 insertions(+), 29 deletions(-)
> 

^ permalink raw reply

* RE: [Xen-devel] [PATCH 5/6] xen-netback: coalesce slots before copying
From: Paul Durrant @ 2013-03-26 11:38 UTC (permalink / raw)
  To: James Harper, Wei Liu, David Vrabel
  Cc: Ian Campbell, Wei Liu, netdev@vger.kernel.org,
	konrad.wilk@oracle.com, xen-devel@lists.xen.org,
	annie.li@oracle.com
In-Reply-To: <6035A0D088A63A46850C3988ED045A4B3880B6AE@BITCOM1.int.sbss.com.au>

> -----Original Message-----
> From: James Harper [mailto:james.harper@bendigoit.com.au]
> Sent: 26 March 2013 11:29
> To: Paul Durrant; Wei Liu; David Vrabel
> Cc: Ian Campbell; Wei Liu; netdev@vger.kernel.org;
> konrad.wilk@oracle.com; xen-devel@lists.xen.org; annie.li@oracle.com
> Subject: RE: [Xen-devel] [PATCH 5/6] xen-netback: coalesce slots before
> copying
> 
> > > As stated previously, I've observed windows issuing staggering numbers
> of
> > > buffers to NDIS miniport drivers, so you will need to coalesce in a
> windows
> > > driver anyway. I'm not sure what the break even point is but I think it's
> safe
> > > to say that in the choice between using 1000 (worst case) ring slots (with
> > > the
> > > resulting mapping overheads) and coalescing in the frontend, coalescing
> is
> > > going to be the better option.
> > >
> >
> > Oh quite, if the backend is mapping and not copying then coalescing in the
> > frontend is the right way to go. I guess coalescing once the frag count
> > reaches a full ring count is probably necessary (since we can't push a partial
> > packet) but it would be nice not to have to do it if the backend is going to
> > copy anyway.
> >
> 
> For a 9k packet with 100 frags (not a common case, but an example), what is
> the cost of mapping those 100 frags into the backend vs coalescing to three
> pages in the frontend and mapping those?
> 
> I may be misremembering but wasn't there a patch floating around for
> persistent mapping to avoid some of this overhead? (not applicable here but
> I thought it meant that the cost wasn't insignificant)
> 

The current version of netback does not map, it always grant-copies.

  Paul

^ permalink raw reply

* Re: [Eulerkernel] [PATCH] af_unix: dont send SCM_CREDENTIAL when dest socket is NULL
From: dingtianhong @ 2013-03-26 11:35 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David S. Miller, Eric Dumazet, netdev, Li Zefan, Xinwei Hu
In-Reply-To: <1364272360.1716.11.camel@edumazet-glaptop>

On 2013/3/26 12:32, Eric Dumazet wrote:
> On Tue, 2013-03-26 at 11:08 +0800, dingtianhong wrote:
>> On 2013/3/25 22:04, Eric Dumazet wrote:
>>> On Mon, 2013-03-25 at 18:28 +0800, dingtianhong wrote:
>>>> SCM_SCREDENTIALS should apply to write() syscalls only either source or destination
>>>> socket asserted SOCK_PASSCRED. The original implememtation in maybe_add_creds is wrong,
>>>> and breaks several LSB testcases ( i.e. /tset/LSB.os/netowkr/recvfrom/T.recvfrom).
>>>>
>>>> Origionally-authored-by: Karel Srot <ksrot@redhat.com>
>>>> Signed-off-by: Ding Tianhong <dingtianhong@huawei.com>
>>>> ---
>>>>    net/unix/af_unix.c | 4 ++--
>>>>    1 file changed, 2 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
>>>> index 51be64f..99189fd 100644
>>>> --- a/net/unix/af_unix.c
>>>> +++ b/net/unix/af_unix.c
>>>> @@ -1413,8 +1413,8 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
>>>>           if (UNIXCB(skb).cred)
>>>>                   return;
>>>>           if (test_bit(SOCK_PASSCRED, &sock->flags) ||
>>>> -           !other->sk_socket ||
>>>> -           test_bit(SOCK_PASSCRED, &other->sk_socket->flags)) {
>>>> +           (other->sk_socket &&
>>>> +           test_bit(SOCK_PASSCRED, &other->sk_socket->flags))) {
>>>>                   UNIXCB(skb).pid  = get_pid(task_tgid(current));
>>>>                   UNIXCB(skb).cred = get_current_cred();
>>>>           }
>>>
>>> I am not sure why adding credentials if other->sk_socket is NULL could
>>> break an application ?
>> The bugzilla has report the bug:https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3523
>>
> 
> OK
> 
>>>
>>> This was the case before commit introducing this code.
>>
>> The commit 16e5726269(af_unix: dont send SCM_CREDENTIALS by default) may  introducing the problem.
>>
> 
> So the problem is that two messages have different credentials,
> because other->sk_socket changed between first and second message.
> 
> and unix_stream_recvmsg() has the following check :
> 
>                 if (check_creds) {
>                         /* Never glue messages from different writers */
>                         if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
>                             (UNIXCB(skb).cred != siocb->scm->cred))
>                                 break;
>                 } else {
>                         /* Copy credentials */
>                         scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
>                         check_creds = 1;
>                 }
> 
> In the case the receiver doesnt care at all (using recvfrom(), not recvmsg()), 
> we probably should not even call scm_set_creds() and avoid extra refcounting.
> 
I think if not call scm_set_creds(), the credential would useles in recvmsg().
we could remove code:
		if (check_creds) {
                        /* Never glue messages from different writers */
                        if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
                            (UNIXCB(skb).cred != siocb->scm->cred))
                                break;
                } else {
                        /* Copy credentials */
                        scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
                        check_creds = 1;
                }
> 
> 
> 
> .
> 

^ permalink raw reply

* Re: [Xen-devel] [PATCH 5/6] xen-netback: coalesce slots before copying
From: Wei Liu @ 2013-03-26 11:29 UTC (permalink / raw)
  To: David Vrabel
  Cc: Wei Liu, Ian Campbell, konrad.wilk@oracle.com,
	netdev@vger.kernel.org, xen-devel@lists.xen.org,
	annie.li@oracle.com
In-Reply-To: <515182E2.50103@citrix.com>

On Tue, Mar 26, 2013 at 11:13:38AM +0000, David Vrabel wrote:
> >>
> >> Separately, it may be sensible for the backend to drop packets with more
> >> frags than max-slots-per-frame up to some threshold where anything more
> >> is considered malicious (i.e., 1 - 18 slots is a valid packet, 19-20 are
> >> dropped and 21 or more is a fatal error).
> >>
> > 
> > Why drop the packet when we are able to process it? Frontend cannot know
> > it has crossed the line anyway.
> 
> Because it's a change to the protocol and we do not want to do this for
> a regression fix.
> 

If I understand correctly the regression you talked about was introduced
by harsh punishment in XSA-39? If so, this is the patch you need to fix
that. Frontend only knows that it has connectivity or not. This patch
guarantee that the old netfront with larger MAX_SKB_FRAGS still see the
same thing from its point of view.  Netfront cannot know the
intermediate state between 18 and 20.

> As a separate fix we can consider increasing the number of slots
> per-packet once there is a mechanism to report this to the front end.
> 

Sure, that's on my TODO list.


Wei.

^ permalink raw reply

* RE: [Xen-devel] [PATCH 5/6] xen-netback: coalesce slots before copying
From: James Harper @ 2013-03-26 11:29 UTC (permalink / raw)
  To: Paul Durrant, Wei Liu, David Vrabel
  Cc: Ian Campbell, Wei Liu, netdev@vger.kernel.org,
	konrad.wilk@oracle.com, xen-devel@lists.xen.org,
	annie.li@oracle.com
In-Reply-To: <291EDFCB1E9E224A99088639C4762022013F7D8E0E06@LONPMAILBOX01.citrite.net>

> > As stated previously, I've observed windows issuing staggering numbers of
> > buffers to NDIS miniport drivers, so you will need to coalesce in a windows
> > driver anyway. I'm not sure what the break even point is but I think it's safe
> > to say that in the choice between using 1000 (worst case) ring slots (with
> > the
> > resulting mapping overheads) and coalescing in the frontend, coalescing is
> > going to be the better option.
> >
> 
> Oh quite, if the backend is mapping and not copying then coalescing in the
> frontend is the right way to go. I guess coalescing once the frag count
> reaches a full ring count is probably necessary (since we can't push a partial
> packet) but it would be nice not to have to do it if the backend is going to
> copy anyway.
> 

For a 9k packet with 100 frags (not a common case, but an example), what is the cost of mapping those 100 frags into the backend vs coalescing to three pages in the frontend and mapping those?

I may be misremembering but wasn't there a patch floating around for persistent mapping to avoid some of this overhead? (not applicable here but I thought it meant that the cost wasn't insignificant)

James

^ permalink raw reply

* RE: [Xen-devel] [PATCH 5/6] xen-netback: coalesce slots before copying
From: David Laight @ 2013-03-26 11:27 UTC (permalink / raw)
  To: James Harper, Paul Durrant, Wei Liu, David Vrabel
  Cc: Ian Campbell, Wei Liu, netdev, konrad.wilk, xen-devel, annie.li
In-Reply-To: <6035A0D088A63A46850C3988ED045A4B3880B4F4@BITCOM1.int.sbss.com.au>

> As stated previously, I've observed windows issuing staggering
> numbers of buffers to NDIS miniport drivers, so you will need
> to coalesce in a windows driver anyway. I'm not sure what the
> break even point is but I think it's safe to say that in the
> choice between using 1000 (worst case) ring slots (with the
> resulting mapping overheads) and coalescing in the frontend,
> coalescing is going to be the better option.

A long time ago we did some calculation on a sparc mbus/sbus
system (that has an iommu requiring setup for dma) and got
a breakeven point of (about) 1k.
(And I'm not sure we arrange to do aligned copies.)

Clearly that isn't directly relevant here...

It is even likely that the ethernet chips will underrun
if requested to do too many ring operations - especially
at their maximum speed.
I guess none of the modern ones require the first fragment
to be at least 100 bytes in order to guarantee retransmission
after a collision.

	David

^ permalink raw reply

* RE: [Xen-devel] [PATCH 5/6] xen-netback: coalesce slots before copying
From: Paul Durrant @ 2013-03-26 11:24 UTC (permalink / raw)
  To: James Harper, Wei Liu, David Vrabel
  Cc: Ian Campbell, Wei Liu, netdev@vger.kernel.org,
	konrad.wilk@oracle.com, xen-devel@lists.xen.org,
	annie.li@oracle.com
In-Reply-To: <6035A0D088A63A46850C3988ED045A4B3880B4F4@BITCOM1.int.sbss.com.au>

> -----Original Message-----
> From: James Harper [mailto:james.harper@bendigoit.com.au]
> Sent: 26 March 2013 11:01
> To: Paul Durrant; Wei Liu; David Vrabel
> Cc: Ian Campbell; Wei Liu; netdev@vger.kernel.org;
> konrad.wilk@oracle.com; xen-devel@lists.xen.org; annie.li@oracle.com
> Subject: RE: [Xen-devel] [PATCH 5/6] xen-netback: coalesce slots before
> copying
> 
> > > Because the check is >= MAX_SKB_FRAGS originally and James Harper
> told
> > > me that "Windows stops counting on 20".
> > >
> >
> > For the Citrix PV drivers I lifted the #define of MAX_SKB_FRAGS from the
> > dom0 kernel (i.e. 18). If a packet coming from the stack has more than that
> > number of fragments then it's copied and coalesced. The value advertised
> > for TSO size is chosen such that a maximally sized TSO will always fit in 18
> > fragments after coalescing but (since this is Windows) the drivers don't
> trust
> > the stack to stick to that limit and will drop a packet if it won't fit.
> >
> > It seems reasonable that, since the backend is copying anyway, that it
> should
> > handle any fragment list coming from the frontend that it can. This would
> > allow the copy-and-coalesce code to be removed from the frontend (and
> the
> > double-copy avoided). If there is a maximum backend packet size though
> > then I think this needs to be advertised to the frontend. The backend
> should
> > clearly bin packets coming from the frontend that exceed that limit but
> > advertising that limit in xenstore allows the frontend to choose the right
> TSO
> > maximum size to advertise to its stack, rather than having to make it based
> > on some historical value that actually has little meaning (in the absence of
> > grant mapping).
> >
> 
> As stated previously, I've observed windows issuing staggering numbers of
> buffers to NDIS miniport drivers, so you will need to coalesce in a windows
> driver anyway. I'm not sure what the break even point is but I think it's safe
> to say that in the choice between using 1000 (worst case) ring slots (with the
> resulting mapping overheads) and coalescing in the frontend, coalescing is
> going to be the better option.
> 

Oh quite, if the backend is mapping and not copying then coalescing in the frontend is the right way to go. I guess coalescing once the frag count reaches a full ring count is probably necessary (since we can't push a partial packet) but it would be nice not to have to do it if the backend is going to copy anyway.

  Paul

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox