Netdev List
 help / color / mirror / Atom feed
* [PATCH 1/2] sh_eth: move data from header file to driver
From: Sergei Shtylyov @ 2013-03-28 21:48 UTC (permalink / raw)
  To: netdev; +Cc: nobuhiro.iwamatsu.yj, linux-sh

The driver's header file contains initialized register offset tables which (as
any data definitions), of course, have no business being there.  Move them  to
the driver's body, somewhat beautifying the initializers, while at it...

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>

---
The patch is against the David Miller's 'net-next.git' repo.

 drivers/net/ethernet/renesas/sh_eth.c |  218 +++++++++++++++++++++++++++++++++
 drivers/net/ethernet/renesas/sh_eth.h |  219 ----------------------------------
 2 files changed, 218 insertions(+), 219 deletions(-)

Index: net-next/drivers/net/ethernet/renesas/sh_eth.c
===================================================================
--- net-next.orig/drivers/net/ethernet/renesas/sh_eth.c
+++ net-next/drivers/net/ethernet/renesas/sh_eth.c
@@ -49,6 +49,224 @@
 		NETIF_MSG_RX_ERR| \
 		NETIF_MSG_TX_ERR)
 
+static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = {
+	[EDSR]		= 0x0000,
+	[EDMR]		= 0x0400,
+	[EDTRR]		= 0x0408,
+	[EDRRR]		= 0x0410,
+	[EESR]		= 0x0428,
+	[EESIPR]	= 0x0430,
+	[TDLAR]		= 0x0010,
+	[TDFAR]		= 0x0014,
+	[TDFXR]		= 0x0018,
+	[TDFFR]		= 0x001c,
+	[RDLAR]		= 0x0030,
+	[RDFAR]		= 0x0034,
+	[RDFXR]		= 0x0038,
+	[RDFFR]		= 0x003c,
+	[TRSCER]	= 0x0438,
+	[RMFCR]		= 0x0440,
+	[TFTR]		= 0x0448,
+	[FDR]		= 0x0450,
+	[RMCR]		= 0x0458,
+	[RPADIR]	= 0x0460,
+	[FCFTR]		= 0x0468,
+	[CSMR]		= 0x04E4,
+
+	[ECMR]		= 0x0500,
+	[ECSR]		= 0x0510,
+	[ECSIPR]	= 0x0518,
+	[PIR]		= 0x0520,
+	[PSR]		= 0x0528,
+	[PIPR]		= 0x052c,
+	[RFLR]		= 0x0508,
+	[APR]		= 0x0554,
+	[MPR]		= 0x0558,
+	[PFTCR]		= 0x055c,
+	[PFRCR]		= 0x0560,
+	[TPAUSER]	= 0x0564,
+	[GECMR]		= 0x05b0,
+	[BCULR]		= 0x05b4,
+	[MAHR]		= 0x05c0,
+	[MALR]		= 0x05c8,
+	[TROCR]		= 0x0700,
+	[CDCR]		= 0x0708,
+	[LCCR]		= 0x0710,
+	[CEFCR]		= 0x0740,
+	[FRECR]		= 0x0748,
+	[TSFRCR]	= 0x0750,
+	[TLFRCR]	= 0x0758,
+	[RFCR]		= 0x0760,
+	[CERCR]		= 0x0768,
+	[CEECR]		= 0x0770,
+	[MAFCR]		= 0x0778,
+	[RMII_MII]	= 0x0790,
+
+	[ARSTR]		= 0x0000,
+	[TSU_CTRST]	= 0x0004,
+	[TSU_FWEN0]	= 0x0010,
+	[TSU_FWEN1]	= 0x0014,
+	[TSU_FCM]	= 0x0018,
+	[TSU_BSYSL0]	= 0x0020,
+	[TSU_BSYSL1]	= 0x0024,
+	[TSU_PRISL0]	= 0x0028,
+	[TSU_PRISL1]	= 0x002c,
+	[TSU_FWSL0]	= 0x0030,
+	[TSU_FWSL1]	= 0x0034,
+	[TSU_FWSLC]	= 0x0038,
+	[TSU_QTAG0]	= 0x0040,
+	[TSU_QTAG1]	= 0x0044,
+	[TSU_FWSR]	= 0x0050,
+	[TSU_FWINMK]	= 0x0054,
+	[TSU_ADQT0]	= 0x0048,
+	[TSU_ADQT1]	= 0x004c,
+	[TSU_VTAG0]	= 0x0058,
+	[TSU_VTAG1]	= 0x005c,
+	[TSU_ADSBSY]	= 0x0060,
+	[TSU_TEN]	= 0x0064,
+	[TSU_POST1]	= 0x0070,
+	[TSU_POST2]	= 0x0074,
+	[TSU_POST3]	= 0x0078,
+	[TSU_POST4]	= 0x007c,
+	[TSU_ADRH0]	= 0x0100,
+	[TSU_ADRL0]	= 0x0104,
+	[TSU_ADRH31]	= 0x01f8,
+	[TSU_ADRL31]	= 0x01fc,
+
+	[TXNLCR0]	= 0x0080,
+	[TXALCR0]	= 0x0084,
+	[RXNLCR0]	= 0x0088,
+	[RXALCR0]	= 0x008c,
+	[FWNLCR0]	= 0x0090,
+	[FWALCR0]	= 0x0094,
+	[TXNLCR1]	= 0x00a0,
+	[TXALCR1]	= 0x00a0,
+	[RXNLCR1]	= 0x00a8,
+	[RXALCR1]	= 0x00ac,
+	[FWNLCR1]	= 0x00b0,
+	[FWALCR1]	= 0x00b4,
+};
+
+static const u16 sh_eth_offset_fast_sh4[SH_ETH_MAX_REGISTER_OFFSET] = {
+	[ECMR]		= 0x0100,
+	[RFLR]		= 0x0108,
+	[ECSR]		= 0x0110,
+	[ECSIPR]	= 0x0118,
+	[PIR]		= 0x0120,
+	[PSR]		= 0x0128,
+	[RDMLR]		= 0x0140,
+	[IPGR]		= 0x0150,
+	[APR]		= 0x0154,
+	[MPR]		= 0x0158,
+	[TPAUSER]	= 0x0164,
+	[RFCF]		= 0x0160,
+	[TPAUSECR]	= 0x0168,
+	[BCFRR]		= 0x016c,
+	[MAHR]		= 0x01c0,
+	[MALR]		= 0x01c8,
+	[TROCR]		= 0x01d0,
+	[CDCR]		= 0x01d4,
+	[LCCR]		= 0x01d8,
+	[CNDCR]		= 0x01dc,
+	[CEFCR]		= 0x01e4,
+	[FRECR]		= 0x01e8,
+	[TSFRCR]	= 0x01ec,
+	[TLFRCR]	= 0x01f0,
+	[RFCR]		= 0x01f4,
+	[MAFCR]		= 0x01f8,
+	[RTRATE]	= 0x01fc,
+
+	[EDMR]		= 0x0000,
+	[EDTRR]		= 0x0008,
+	[EDRRR]		= 0x0010,
+	[TDLAR]		= 0x0018,
+	[RDLAR]		= 0x0020,
+	[EESR]		= 0x0028,
+	[EESIPR]	= 0x0030,
+	[TRSCER]	= 0x0038,
+	[RMFCR]		= 0x0040,
+	[TFTR]		= 0x0048,
+	[FDR]		= 0x0050,
+	[RMCR]		= 0x0058,
+	[TFUCR]		= 0x0064,
+	[RFOCR]		= 0x0068,
+	[FCFTR]		= 0x0070,
+	[RPADIR]	= 0x0078,
+	[TRIMD]		= 0x007c,
+	[RBWAR]		= 0x00c8,
+	[RDFAR]		= 0x00cc,
+	[TBRAR]		= 0x00d4,
+	[TDFAR]		= 0x00d8,
+};
+
+static const u16 sh_eth_offset_fast_sh3_sh2[SH_ETH_MAX_REGISTER_OFFSET] = {
+	[ECMR]		= 0x0160,
+	[ECSR]		= 0x0164,
+	[ECSIPR]	= 0x0168,
+	[PIR]		= 0x016c,
+	[MAHR]		= 0x0170,
+	[MALR]		= 0x0174,
+	[RFLR]		= 0x0178,
+	[PSR]		= 0x017c,
+	[TROCR]		= 0x0180,
+	[CDCR]		= 0x0184,
+	[LCCR]		= 0x0188,
+	[CNDCR]		= 0x018c,
+	[CEFCR]		= 0x0194,
+	[FRECR]		= 0x0198,
+	[TSFRCR]	= 0x019c,
+	[TLFRCR]	= 0x01a0,
+	[RFCR]		= 0x01a4,
+	[MAFCR]		= 0x01a8,
+	[IPGR]		= 0x01b4,
+	[APR]		= 0x01b8,
+	[MPR]		= 0x01bc,
+	[TPAUSER]	= 0x01c4,
+	[BCFR]		= 0x01cc,
+
+	[ARSTR]		= 0x0000,
+	[TSU_CTRST]	= 0x0004,
+	[TSU_FWEN0]	= 0x0010,
+	[TSU_FWEN1]	= 0x0014,
+	[TSU_FCM]	= 0x0018,
+	[TSU_BSYSL0]	= 0x0020,
+	[TSU_BSYSL1]	= 0x0024,
+	[TSU_PRISL0]	= 0x0028,
+	[TSU_PRISL1]	= 0x002c,
+	[TSU_FWSL0]	= 0x0030,
+	[TSU_FWSL1]	= 0x0034,
+	[TSU_FWSLC]	= 0x0038,
+	[TSU_QTAGM0]	= 0x0040,
+	[TSU_QTAGM1]	= 0x0044,
+	[TSU_ADQT0]	= 0x0048,
+	[TSU_ADQT1]	= 0x004c,
+	[TSU_FWSR]	= 0x0050,
+	[TSU_FWINMK]	= 0x0054,
+	[TSU_ADSBSY]	= 0x0060,
+	[TSU_TEN]	= 0x0064,
+	[TSU_POST1]	= 0x0070,
+	[TSU_POST2]	= 0x0074,
+	[TSU_POST3]	= 0x0078,
+	[TSU_POST4]	= 0x007c,
+
+	[TXNLCR0]	= 0x0080,
+	[TXALCR0]	= 0x0084,
+	[RXNLCR0]	= 0x0088,
+	[RXALCR0]	= 0x008c,
+	[FWNLCR0]	= 0x0090,
+	[FWALCR0]	= 0x0094,
+	[TXNLCR1]	= 0x00a0,
+	[TXALCR1]	= 0x00a0,
+	[RXNLCR1]	= 0x00a8,
+	[RXALCR1]	= 0x00ac,
+	[FWNLCR1]	= 0x00b0,
+	[FWALCR1]	= 0x00b4,
+
+	[TSU_ADRH0]	= 0x0100,
+	[TSU_ADRL0]	= 0x0104,
+	[TSU_ADRL31]	= 0x01fc,
+};
+
 #if defined(CONFIG_CPU_SUBTYPE_SH7734) || \
 	defined(CONFIG_CPU_SUBTYPE_SH7763) || \
 	defined(CONFIG_ARCH_R8A7740)
Index: net-next/drivers/net/ethernet/renesas/sh_eth.h
===================================================================
--- net-next.orig/drivers/net/ethernet/renesas/sh_eth.h
+++ net-next/drivers/net/ethernet/renesas/sh_eth.h
@@ -156,225 +156,6 @@ enum {
 	SH_ETH_MAX_REGISTER_OFFSET,
 };
 
-static const u16 sh_eth_offset_gigabit[SH_ETH_MAX_REGISTER_OFFSET] = {
-	[EDSR]	= 0x0000,
-	[EDMR]	= 0x0400,
-	[EDTRR]	= 0x0408,
-	[EDRRR]	= 0x0410,
-	[EESR]	= 0x0428,
-	[EESIPR]	= 0x0430,
-	[TDLAR]	= 0x0010,
-	[TDFAR]	= 0x0014,
-	[TDFXR]	= 0x0018,
-	[TDFFR]	= 0x001c,
-	[RDLAR]	= 0x0030,
-	[RDFAR]	= 0x0034,
-	[RDFXR]	= 0x0038,
-	[RDFFR]	= 0x003c,
-	[TRSCER]	= 0x0438,
-	[RMFCR]	= 0x0440,
-	[TFTR]	= 0x0448,
-	[FDR]	= 0x0450,
-	[RMCR]	= 0x0458,
-	[RPADIR]	= 0x0460,
-	[FCFTR]	= 0x0468,
-	[CSMR] = 0x04E4,
-
-	[ECMR]	= 0x0500,
-	[ECSR]	= 0x0510,
-	[ECSIPR]	= 0x0518,
-	[PIR]	= 0x0520,
-	[PSR]	= 0x0528,
-	[PIPR]	= 0x052c,
-	[RFLR]	= 0x0508,
-	[APR]	= 0x0554,
-	[MPR]	= 0x0558,
-	[PFTCR]	= 0x055c,
-	[PFRCR]	= 0x0560,
-	[TPAUSER]	= 0x0564,
-	[GECMR]	= 0x05b0,
-	[BCULR]	= 0x05b4,
-	[MAHR]	= 0x05c0,
-	[MALR]	= 0x05c8,
-	[TROCR]	= 0x0700,
-	[CDCR]	= 0x0708,
-	[LCCR]	= 0x0710,
-	[CEFCR]	= 0x0740,
-	[FRECR]	= 0x0748,
-	[TSFRCR]	= 0x0750,
-	[TLFRCR]	= 0x0758,
-	[RFCR]	= 0x0760,
-	[CERCR]	= 0x0768,
-	[CEECR]	= 0x0770,
-	[MAFCR]	= 0x0778,
-	[RMII_MII] =  0x0790,
-
-	[ARSTR]	= 0x0000,
-	[TSU_CTRST]	= 0x0004,
-	[TSU_FWEN0]	= 0x0010,
-	[TSU_FWEN1]	= 0x0014,
-	[TSU_FCM]	= 0x0018,
-	[TSU_BSYSL0]	= 0x0020,
-	[TSU_BSYSL1]	= 0x0024,
-	[TSU_PRISL0]	= 0x0028,
-	[TSU_PRISL1]	= 0x002c,
-	[TSU_FWSL0]	= 0x0030,
-	[TSU_FWSL1]	= 0x0034,
-	[TSU_FWSLC]	= 0x0038,
-	[TSU_QTAG0]	= 0x0040,
-	[TSU_QTAG1]	= 0x0044,
-	[TSU_FWSR]	= 0x0050,
-	[TSU_FWINMK]	= 0x0054,
-	[TSU_ADQT0]	= 0x0048,
-	[TSU_ADQT1]	= 0x004c,
-	[TSU_VTAG0]	= 0x0058,
-	[TSU_VTAG1]	= 0x005c,
-	[TSU_ADSBSY]	= 0x0060,
-	[TSU_TEN]	= 0x0064,
-	[TSU_POST1]	= 0x0070,
-	[TSU_POST2]	= 0x0074,
-	[TSU_POST3]	= 0x0078,
-	[TSU_POST4]	= 0x007c,
-	[TSU_ADRH0]	= 0x0100,
-	[TSU_ADRL0]	= 0x0104,
-	[TSU_ADRH31]	= 0x01f8,
-	[TSU_ADRL31]	= 0x01fc,
-
-	[TXNLCR0]	= 0x0080,
-	[TXALCR0]	= 0x0084,
-	[RXNLCR0]	= 0x0088,
-	[RXALCR0]	= 0x008c,
-	[FWNLCR0]	= 0x0090,
-	[FWALCR0]	= 0x0094,
-	[TXNLCR1]	= 0x00a0,
-	[TXALCR1]	= 0x00a0,
-	[RXNLCR1]	= 0x00a8,
-	[RXALCR1]	= 0x00ac,
-	[FWNLCR1]	= 0x00b0,
-	[FWALCR1]	= 0x00b4,
-};
-
-static const u16 sh_eth_offset_fast_sh4[SH_ETH_MAX_REGISTER_OFFSET] = {
-	[ECMR]	= 0x0100,
-	[RFLR]	= 0x0108,
-	[ECSR]	= 0x0110,
-	[ECSIPR]	= 0x0118,
-	[PIR]	= 0x0120,
-	[PSR]	= 0x0128,
-	[RDMLR]	= 0x0140,
-	[IPGR]	= 0x0150,
-	[APR]	= 0x0154,
-	[MPR]	= 0x0158,
-	[TPAUSER]	= 0x0164,
-	[RFCF]	= 0x0160,
-	[TPAUSECR]	= 0x0168,
-	[BCFRR]	= 0x016c,
-	[MAHR]	= 0x01c0,
-	[MALR]	= 0x01c8,
-	[TROCR]	= 0x01d0,
-	[CDCR]	= 0x01d4,
-	[LCCR]	= 0x01d8,
-	[CNDCR]	= 0x01dc,
-	[CEFCR]	= 0x01e4,
-	[FRECR]	= 0x01e8,
-	[TSFRCR]	= 0x01ec,
-	[TLFRCR]	= 0x01f0,
-	[RFCR]	= 0x01f4,
-	[MAFCR]	= 0x01f8,
-	[RTRATE]	= 0x01fc,
-
-	[EDMR]	= 0x0000,
-	[EDTRR]	= 0x0008,
-	[EDRRR]	= 0x0010,
-	[TDLAR]	= 0x0018,
-	[RDLAR]	= 0x0020,
-	[EESR]	= 0x0028,
-	[EESIPR]	= 0x0030,
-	[TRSCER]	= 0x0038,
-	[RMFCR]	= 0x0040,
-	[TFTR]	= 0x0048,
-	[FDR]	= 0x0050,
-	[RMCR]	= 0x0058,
-	[TFUCR]	= 0x0064,
-	[RFOCR]	= 0x0068,
-	[FCFTR]	= 0x0070,
-	[RPADIR]	= 0x0078,
-	[TRIMD]	= 0x007c,
-	[RBWAR]	= 0x00c8,
-	[RDFAR]	= 0x00cc,
-	[TBRAR]	= 0x00d4,
-	[TDFAR]	= 0x00d8,
-};
-
-static const u16 sh_eth_offset_fast_sh3_sh2[SH_ETH_MAX_REGISTER_OFFSET] = {
-	[ECMR]	= 0x0160,
-	[ECSR]	= 0x0164,
-	[ECSIPR]	= 0x0168,
-	[PIR]	= 0x016c,
-	[MAHR]	= 0x0170,
-	[MALR]	= 0x0174,
-	[RFLR]	= 0x0178,
-	[PSR]	= 0x017c,
-	[TROCR]	= 0x0180,
-	[CDCR]	= 0x0184,
-	[LCCR]	= 0x0188,
-	[CNDCR]	= 0x018c,
-	[CEFCR]	= 0x0194,
-	[FRECR]	= 0x0198,
-	[TSFRCR]	= 0x019c,
-	[TLFRCR]	= 0x01a0,
-	[RFCR]	= 0x01a4,
-	[MAFCR]	= 0x01a8,
-	[IPGR]	= 0x01b4,
-	[APR]	= 0x01b8,
-	[MPR]	= 0x01bc,
-	[TPAUSER]	= 0x01c4,
-	[BCFR]	= 0x01cc,
-
-	[ARSTR]	= 0x0000,
-	[TSU_CTRST]	= 0x0004,
-	[TSU_FWEN0]	= 0x0010,
-	[TSU_FWEN1]	= 0x0014,
-	[TSU_FCM]	= 0x0018,
-	[TSU_BSYSL0]	= 0x0020,
-	[TSU_BSYSL1]	= 0x0024,
-	[TSU_PRISL0]	= 0x0028,
-	[TSU_PRISL1]	= 0x002c,
-	[TSU_FWSL0]	= 0x0030,
-	[TSU_FWSL1]	= 0x0034,
-	[TSU_FWSLC]	= 0x0038,
-	[TSU_QTAGM0]	= 0x0040,
-	[TSU_QTAGM1]	= 0x0044,
-	[TSU_ADQT0]	= 0x0048,
-	[TSU_ADQT1]	= 0x004c,
-	[TSU_FWSR]	= 0x0050,
-	[TSU_FWINMK]	= 0x0054,
-	[TSU_ADSBSY]	= 0x0060,
-	[TSU_TEN]	= 0x0064,
-	[TSU_POST1]	= 0x0070,
-	[TSU_POST2]	= 0x0074,
-	[TSU_POST3]	= 0x0078,
-	[TSU_POST4]	= 0x007c,
-
-	[TXNLCR0]	= 0x0080,
-	[TXALCR0]	= 0x0084,
-	[RXNLCR0]	= 0x0088,
-	[RXALCR0]	= 0x008c,
-	[FWNLCR0]	= 0x0090,
-	[FWALCR0]	= 0x0094,
-	[TXNLCR1]	= 0x00a0,
-	[TXALCR1]	= 0x00a0,
-	[RXNLCR1]	= 0x00a8,
-	[RXALCR1]	= 0x00ac,
-	[FWNLCR1]	= 0x00b0,
-	[FWALCR1]	= 0x00b4,
-
-	[TSU_ADRH0]	= 0x0100,
-	[TSU_ADRL0]	= 0x0104,
-	[TSU_ADRL31]	= 0x01fc,
-
-};
-
 /* Driver's parameters */
 #if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_SHMOBILE)
 #define SH4_SKB_RX_ALIGN	32

^ permalink raw reply

* [net-next PATCH] [RFC] [v2] net: add option to enable error queue packets waking select
From: Jacob Keller @ 2013-03-28 21:19 UTC (permalink / raw)
  To: netdev; +Cc: Jeffrey Kirsher, Richard Cochran, Matthew Vick

Currently, when a socket receives something on the error queue it only wakes up
the socket on select if it is in the "read" list, that is the socket has
something to read. It is useful also to wake the socket if it is in the error
list, which would enable software to wait on error queue packets without waking
up for regular data on the socket. The main use case is for receiving
timestamped transmit packets which return the timestamp to the socket via the
error queue. This enables an application to select on the socket for the error
queue only instead of for the regular traffic.

-v2-
* Added the SO_SELECT_ERR_QUEUE socket option to every architechture specific file
* Modified every socket poll function that checks error queue

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Cc: Jeffrey Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Richard Cochran <richardcochran@gmail.com>
Cc: Matthew Vick <matthew.vick@intel.com>
---
 arch/alpha/include/uapi/asm/socket.h   |    2 ++
 arch/avr32/include/uapi/asm/socket.h   |    2 ++
 arch/cris/include/uapi/asm/socket.h    |    2 ++
 arch/frv/include/uapi/asm/socket.h     |    2 ++
 arch/h8300/include/uapi/asm/socket.h   |    2 ++
 arch/ia64/include/uapi/asm/socket.h    |    2 ++
 arch/m32r/include/uapi/asm/socket.h    |    2 ++
 arch/mips/include/uapi/asm/socket.h    |    2 ++
 arch/mn10300/include/uapi/asm/socket.h |    2 ++
 arch/parisc/include/uapi/asm/socket.h  |    2 ++
 arch/powerpc/include/uapi/asm/socket.h |    2 ++
 arch/s390/include/uapi/asm/socket.h    |    2 ++
 arch/sparc/include/uapi/asm/socket.h   |    2 ++
 arch/xtensa/include/uapi/asm/socket.h  |    2 ++
 include/net/sock.h                     |    1 +
 include/uapi/asm-generic/socket.h      |    2 ++
 net/bluetooth/af_bluetooth.c           |    3 ++-
 net/core/datagram.c                    |    4 +++-
 net/core/sock.c                        |    8 ++++++++
 net/iucv/af_iucv.c                     |    3 ++-
 net/nfc/llcp/sock.c                    |    3 ++-
 net/sctp/socket.c                      |    3 ++-
 net/unix/af_unix.c                     |    4 +++-
 23 files changed, 53 insertions(+), 6 deletions(-)

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index c519552..eee6ea7 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -79,4 +79,6 @@
 
 #define SO_LOCK_FILTER		44
 
+#define SO_SELECT_ERR_QUEUE	45
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h
index 51c6401..37401f5 100644
--- a/arch/avr32/include/uapi/asm/socket.h
+++ b/arch/avr32/include/uapi/asm/socket.h
@@ -72,4 +72,6 @@
 
 #define SO_LOCK_FILTER		44
 
+#define SO_SELECT_ERR_QUEUE	45
+
 #endif /* __ASM_AVR32_SOCKET_H */
diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h
index 50692b7..ba409c9 100644
--- a/arch/cris/include/uapi/asm/socket.h
+++ b/arch/cris/include/uapi/asm/socket.h
@@ -74,6 +74,8 @@
 
 #define SO_LOCK_FILTER		44
 
+#define SO_SELECT_ERR_QUEUE	45
+
 #endif /* _ASM_SOCKET_H */
 
 
diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h
index 595391f..31dbb5d 100644
--- a/arch/frv/include/uapi/asm/socket.h
+++ b/arch/frv/include/uapi/asm/socket.h
@@ -72,5 +72,7 @@
 
 #define SO_LOCK_FILTER		44
 
+#define SO_SELECT_ERR_QUEUE	45
+
 #endif /* _ASM_SOCKET_H */
 
diff --git a/arch/h8300/include/uapi/asm/socket.h b/arch/h8300/include/uapi/asm/socket.h
index 43e3262..5d1c6d0 100644
--- a/arch/h8300/include/uapi/asm/socket.h
+++ b/arch/h8300/include/uapi/asm/socket.h
@@ -72,4 +72,6 @@
 
 #define SO_LOCK_FILTER		44
 
+#define SO_SELECT_ERR_QUEUE	45
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index c567adc..6b4329f 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -81,4 +81,6 @@
 
 #define SO_LOCK_FILTER		44
 
+#define SO_SELECT_ERR_QUEUE	45
+
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h
index 519afa2..2a3b59e 100644
--- a/arch/m32r/include/uapi/asm/socket.h
+++ b/arch/m32r/include/uapi/asm/socket.h
@@ -72,4 +72,6 @@
 
 #define SO_LOCK_FILTER		44
 
+#define SO_SELECT_ERR_QUEUE	45
+
 #endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 47132f4..3b21150 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -90,4 +90,6 @@
 
 #define SO_LOCK_FILTER		44
 
+#define SO_SELECT_ERR_QUEUE	45
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h
index 5c7c7c9..b4ce844 100644
--- a/arch/mn10300/include/uapi/asm/socket.h
+++ b/arch/mn10300/include/uapi/asm/socket.h
@@ -72,4 +72,6 @@
 
 #define SO_LOCK_FILTER		44
 
+#define SO_SELECT_ERR_QUEUE	45
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 526e4b9..70c512a 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -71,6 +71,8 @@
 
 #define SO_LOCK_FILTER		0x4025
 
+#define SO_SELECT_ERR_QUEUE	0x4026
+
 /* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
  * have to define SOCK_NONBLOCK to a different value here.
  */
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index a26dcae..a36daf3 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -79,4 +79,6 @@
 
 #define SO_LOCK_FILTER		44
 
+#define SO_SELECT_ERR_QUEUE	45
+
 #endif	/* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index f99eea7..2dacb306 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -78,4 +78,6 @@
 
 #define SO_LOCK_FILTER		44
 
+#define SO_SELECT_ERR_QUEUE	45
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index cbbad74..89f49b6 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -68,6 +68,8 @@
 
 #define SO_LOCK_FILTER		0x0028
 
+#define SO_SELECT_ERR_QUEUE	0x0029
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index 35905cb..a8f44f5 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -83,4 +83,6 @@
 
 #define SO_LOCK_FILTER		44
 
+#define SO_SELECT_ERR_QUEUE	45
+
 #endif	/* _XTENSA_SOCKET_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 14f6e9d..08f05f9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -667,6 +667,7 @@ enum sock_flags {
 		     * user-space instead.
 		     */
 	SOCK_FILTER_LOCKED, /* Filter cannot be changed anymore */
+	SOCK_SELECT_ERR_QUEUE, /* Wake select on error queue */
 };
 
 static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 4ef3acb..c5d2e3a 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -74,4 +74,6 @@
 
 #define SO_LOCK_FILTER		44
 
+#define SO_SELECT_ERR_QUEUE	45
+
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index d3ee69b..409902f 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -422,7 +422,8 @@ unsigned int bt_sock_poll(struct file *file, struct socket *sock,
 		return bt_accept_poll(sk);
 
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
-		mask |= POLLERR;
+		mask |= POLLERR |
+			sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0;
 
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 368f9c3..36da5b6 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -749,7 +749,9 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
 
 	/* exceptional events? */
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
-		mask |= POLLERR;
+		mask |= POLLERR |
+			sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0;
+
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
 	if (sk->sk_shutdown == SHUTDOWN_MASK)
diff --git a/net/core/sock.c b/net/core/sock.c
index a19e728..2ff5f36 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -907,6 +907,10 @@ set_rcvbuf:
 		sock_valbool_flag(sk, SOCK_NOFCS, valbool);
 		break;
 
+	case SO_SELECT_ERR_QUEUE:
+		sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
+		break;
+
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -1160,6 +1164,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
 		break;
 
+	case SO_SELECT_ERR_QUEUE:
+		v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index a7d11ffe..f0550a3 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1461,7 +1461,8 @@ unsigned int iucv_sock_poll(struct file *file, struct socket *sock,
 		return iucv_accept_poll(sk);
 
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
-		mask |= POLLERR;
+		mask |= POLLERR |
+			sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0;
 
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 		mask |= POLLRDHUP;
diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c
index f1b377e..2d55e8a 100644
--- a/net/nfc/llcp/sock.c
+++ b/net/nfc/llcp/sock.c
@@ -521,7 +521,8 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock,
 		return llcp_accept_poll(sk);
 
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
-		mask |= POLLERR;
+		mask |= POLLERR |
+			sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0;
 
 	if (!skb_queue_empty(&sk->sk_receive_queue))
 		mask |= POLLIN | POLLRDNORM;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b907073..dd21ae3 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6185,7 +6185,8 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
 
 	/* Is there any exceptional events?  */
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
-		mask |= POLLERR;
+		mask |= POLLERR |
+			sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0;
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
 	if (sk->sk_shutdown == SHUTDOWN_MASK)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 51be64f..2d2ccf8 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2197,7 +2197,9 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
 
 	/* exceptional events? */
 	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
-		mask |= POLLERR;
+		mask |= POLLERR |
+			sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0;
+
 	if (sk->sk_shutdown & RCV_SHUTDOWN)
 		mask |= POLLRDHUP | POLLIN | POLLRDNORM;
 	if (sk->sk_shutdown == SHUTDOWN_MASK)

^ permalink raw reply related

* [PATCH net-next] audit: pass int* to nlmsg_next
From: Alexandru Copot @ 2013-03-28 21:31 UTC (permalink / raw)
  To: linux-kernel, netdev, davem; +Cc: honkiko, tgraf, Alexandru Copot

Commit 941912133025926307c7a65b203fa38403b1063a replaced the macros
NLMSG_NEXT with calls to nlmsg_next which produces this warning:

kernel/audit.c: In function ‘audit_receive_skb’:
kernel/audit.c:928:3: warning: passing argument 2 of ‘nlmsg_next’ makes pointer from integer without a cast
In file included from include/net/rtnetlink.h:5:0,
                 from include/net/neighbour.h:28,
                 from include/net/dst.h:17,
                 from include/net/sock.h:68,
                 from kernel/audit.c:55:
include/net/netlink.h:359:1: note: expected ‘int *’ but argument is of type ‘int’

Fix this by sending the intended pointer.

Signed-off-by: Alexandru Copot <alex.mihai.c@gmail.com>
---
 kernel/audit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/audit.c b/kernel/audit.c
index 4dbb047..488f85f 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -925,7 +925,7 @@ static void audit_receive_skb(struct sk_buff *skb)
 		if (err || (nlh->nlmsg_flags & NLM_F_ACK))
 			netlink_ack(skb, nlh, err);
 
-		nlh = nlmsg_next(nlh, len);
+		nlh = nlmsg_next(nlh, &len);
 	}
 }
 
-- 
1.8.2

^ permalink raw reply related

* [PATCH 1/2] net: calxedaxgmac: fix rx ring handling when OOM
From: Rob Herring @ 2013-03-28 21:32 UTC (permalink / raw)
  To: linux-kernel, netdev; +Cc: David S. Miller, Rob Herring

From: Rob Herring <rob.herring@calxeda.com>

If skb allocation for the rx ring fails repeatedly, we can reach a point
were the ring is empty. In this condition, the driver is out of sync with
the h/w. While this has always been possible, the removal of the skb
recycling seems to have made triggering this problem easier.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
---
 drivers/net/ethernet/calxeda/xgmac.c |    3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c
index a170065..bb1e80b 100644
--- a/drivers/net/ethernet/calxeda/xgmac.c
+++ b/drivers/net/ethernet/calxeda/xgmac.c
@@ -1141,6 +1141,9 @@ static int xgmac_rx(struct xgmac_priv *priv, int limit)
 		struct sk_buff *skb;
 		int frame_len;
 
+		if (!dma_ring_cnt(priv->rx_head, priv->rx_tail, DMA_RX_RING_SZ))
+			break;
+
 		entry = priv->rx_tail;
 		p = priv->dma_rx + entry;
 		if (desc_get_owner(p))
-- 
1.7.10.4

^ permalink raw reply related

* [PATCH 2/2] net: calxedaxgmac: Wake-on-LAN fixes
From: Rob Herring @ 2013-03-28 21:32 UTC (permalink / raw)
  To: linux-kernel, netdev; +Cc: David S. Miller, Rob Herring
In-Reply-To: <1364506365-13689-1-git-send-email-robherring2@gmail.com>

From: Rob Herring <rob.herring@calxeda.com>

WOL is broken because the magic packet status bit is getting set rather
than the enable bit. The PMT interrupt is not getting serviced because
the PMT interrupt is also enabled on the global interrupt, but not
cleared by the global interrupt and the global interrupt is higher
priority. This fixes both of these issues to get WOL working.

There's still a problem with receive after resume, but at least now we
can wake-up.

Signed-off-by: Rob Herring <rob.herring@calxeda.com>
---
 drivers/net/ethernet/calxeda/xgmac.c |    6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c
index bb1e80b..b0ebc9f 100644
--- a/drivers/net/ethernet/calxeda/xgmac.c
+++ b/drivers/net/ethernet/calxeda/xgmac.c
@@ -163,6 +163,7 @@
 #define XGMAC_FLOW_CTRL_FCB_BPA	0x00000001	/* Flow Control Busy ... */
 
 /* XGMAC_INT_STAT reg */
+#define XGMAC_INT_STAT_PMTIM	0x00800000	/* PMT Interrupt Mask */
 #define XGMAC_INT_STAT_PMT	0x0080		/* PMT Interrupt Status */
 #define XGMAC_INT_STAT_LPI	0x0040		/* LPI Interrupt Status */
 
@@ -960,6 +961,9 @@ static int xgmac_hw_init(struct net_device *dev)
 	writel(DMA_INTR_DEFAULT_MASK, ioaddr + XGMAC_DMA_STATUS);
 	writel(DMA_INTR_DEFAULT_MASK, ioaddr + XGMAC_DMA_INTR_ENA);
 
+	/* Mask power mgt interrupt */
+	writel(XGMAC_INT_STAT_PMTIM, ioaddr + XGMAC_INT_STAT);
+
 	/* XGMAC requires AXI bus init. This is a 'magic number' for now */
 	writel(0x0077000E, ioaddr + XGMAC_DMA_AXI_BUS);
 
@@ -1828,7 +1832,7 @@ static void xgmac_pmt(void __iomem *ioaddr, unsigned long mode)
 	unsigned int pmt = 0;
 
 	if (mode & WAKE_MAGIC)
-		pmt |= XGMAC_PMT_POWERDOWN | XGMAC_PMT_MAGIC_PKT;
+		pmt |= XGMAC_PMT_POWERDOWN | XGMAC_PMT_MAGIC_PKT_EN;
 	if (mode & WAKE_UCAST)
 		pmt |= XGMAC_PMT_POWERDOWN | XGMAC_PMT_GLBL_UNICAST;
 
-- 
1.7.10.4

^ permalink raw reply related

* Re: [PATCH 1/2] sky2: Receive Overflows not counted
From: Stephen Hemminger @ 2013-03-28 21:40 UTC (permalink / raw)
  To: Mirko Lindner, David Miller; +Cc: netdev
In-Reply-To: <1902752B0C92F943AB7EA9EE13E2DEEC7F1545@HQ1-EXCH02.corp.brocade.com>

> The sky2 driver doesn't count the Receive Overflows because the MAC 
> interrupt for this event is not set in the MAC's interrupt mask.
> The MAC's interrupt mask is set only for Transmit FIFO Underruns.
> 
> Fix: The correct setting should be (GM_IS_TX_FF_UR | GM_IS_RX_FF_OR)
> Otherwise the Receive Overflow event will not generate any interrupt.
> The  Receive Overflow interrupt is handled correctly
> 
> Signed-off-by: Mirko Lindner <mlindner@marvell.com>

Both these patches are fine, but the patch format seems corrupted since
they won't directly apply with quilt.

Acked-by: Stephen Hemminger <stephen@networkplumber.org>

^ permalink raw reply

* Re: [PATCH net-next] audit: pass int* to nlmsg_next
From: David Miller @ 2013-03-28 21:40 UTC (permalink / raw)
  To: alex.mihai.c; +Cc: linux-kernel, netdev, honkiko, tgraf
In-Reply-To: <1364506289-9146-1-git-send-email-alex.mihai.c@gmail.com>

From: Alexandru Copot <alex.mihai.c@gmail.com>
Date: Thu, 28 Mar 2013 23:31:29 +0200

> Commit 941912133025926307c7a65b203fa38403b1063a replaced the macros
> NLMSG_NEXT with calls to nlmsg_next which produces this warning:
> 
> kernel/audit.c: In function ‘audit_receive_skb’:
> kernel/audit.c:928:3: warning: passing argument 2 of ‘nlmsg_next’ makes pointer from integer without a cast
> In file included from include/net/rtnetlink.h:5:0,
>                  from include/net/neighbour.h:28,
>                  from include/net/dst.h:17,
>                  from include/net/sock.h:68,
>                  from kernel/audit.c:55:
> include/net/netlink.h:359:1: note: expected ‘int *’ but argument is of type ‘int’
> 
> Fix this by sending the intended pointer.
> 
> Signed-off-by: Alexandru Copot <alex.mihai.c@gmail.com>

Applied, thanks.

^ permalink raw reply

* Re: pull request: wireless 2013-03-28
From: David Miller @ 2013-03-28 21:42 UTC (permalink / raw)
  To: linville; +Cc: linux-wireless, netdev
In-Reply-To: <20130328191058.GA27701@tuxdriver.com>

From: "John W. Linville" <linville@tuxdriver.com>
Date: Thu, 28 Mar 2013 15:10:58 -0400

> Please excuse this larger-than-I-would-like pull request intended
> for the 3.9 stream.  There are a number of late-breaking fixes,
> including a revert...

Pulled, thanks John.

^ permalink raw reply

* Re: [PATCH iproute2 7/7] ip/xfrm: Improve error strings
From: Stephen Hemminger @ 2013-03-28 21:47 UTC (permalink / raw)
  To: David Ward; +Cc: netdev
In-Reply-To: <1364221399-1024-7-git-send-email-david.ward@ll.mit.edu>

On Mon, 25 Mar 2013 10:23:19 -0400
David Ward <david.ward@ll.mit.edu> wrote:

> Quotation marks are now used only to indicate literal text on the
> command line.
> 
> Signed-off-by: David Ward <david.ward@ll.mit.edu>

All applied, although last one need a little whitespace fix up.

/usr/src/iproute2/.git/rebase-apply/patch:16: space before tab in indent.
 	fprintf(stderr, "MODE := transport | tunnel | beet | ro | in_trigger\n");
/usr/src/iproute2/.git/rebase-apply/patch:39: space before tab in indent.
 	fprintf(stderr, "MODE := transport | tunnel | beet | ro | in_trigger\n");
warning: 2 lines add whitespace errors.

^ permalink raw reply

* [PATCH 2/2] sh_eth: add R-Car support for real
From: Sergei Shtylyov @ 2013-03-28 21:51 UTC (permalink / raw)
  To: netdev; +Cc: nobuhiro.iwamatsu.yj, linux-sh, phil.edworthy

Commit d0418bb7123f44b23d69ac349eec7daf9103472f (net: sh_eth: Add eth support
for R8A7779 device) was a failed attempt to add support for one of members of
the R-Car SoC family.  That's for three reasons: it treated R8A7779 the  same
as SH7724 except including quite dirty hack adding ECMR_ELB  bit  to the mask
in sh_eth_set_rate() while not removing ECMR_RTM bit (despite it's reserved in
R-Car Ether), and it didn't add a new register offset array despite the closest
SH_ETH_REG_FAST_SH4 mapping differs by 0x200 to the offsets all the R-Car Ether
registers have, and also some of the registers in this old mapping don't exist
on R-Car Ether (due to this, SH7724's 'sh_eth_my_cpu_data' structure is not
adequeate for R-Car too).  Fix all these shortcomings, restoring the SH7724
related section to its pristine state...

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>

---
The patch is against the David Miller's 'net-next.git' repo.

Support for the other members of R-Car family such as R8A7778 and R8A7790 should
probably be added when they hit mainline (support for the former is already in
the 'next' branch of Simon Horman's 'renesas.git' repo).

 drivers/net/ethernet/renesas/sh_eth.c |  107 +++++++++++++++++++++++++++++++---
 include/linux/sh_eth.h                |    1 
 2 files changed, 100 insertions(+), 8 deletions(-)

Index: net-next/drivers/net/ethernet/renesas/sh_eth.c
===================================================================
--- net-next.orig/drivers/net/ethernet/renesas/sh_eth.c
+++ net-next/drivers/net/ethernet/renesas/sh_eth.c
@@ -2,7 +2,8 @@
  *  SuperH Ethernet device driver
  *
  *  Copyright (C) 2006-2012 Nobuhiro Iwamatsu
- *  Copyright (C) 2008-2012 Renesas Solutions Corp.
+ *  Copyright (C) 2008-2013 Renesas Solutions Corp.
+ *  Copyright (C) 2013 Cogent Embedded, Inc.
  *
  *  This program is free software; you can redistribute it and/or modify it
  *  under the terms and conditions of the GNU General Public License,
@@ -147,6 +148,51 @@ static const u16 sh_eth_offset_gigabit[S
 	[FWALCR1]	= 0x00b4,
 };
 
+static const u16 sh_eth_offset_fast_rcar[SH_ETH_MAX_REGISTER_OFFSET] = {
+	[ECMR]		= 0x0300,
+	[RFLR]		= 0x0308,
+	[ECSR]		= 0x0310,
+	[ECSIPR]	= 0x0318,
+	[PIR]		= 0x0320,
+	[PSR]		= 0x0328,
+	[RDMLR]		= 0x0340,
+	[IPGR]		= 0x0350,
+	[APR]		= 0x0354,
+	[MPR]		= 0x0358,
+	[RFCF]		= 0x0360,
+	[TPAUSER]	= 0x0364,
+	[TPAUSECR]	= 0x0368,
+	[MAHR]		= 0x03c0,
+	[MALR]		= 0x03c8,
+	[TROCR]		= 0x03d0,
+	[CDCR]		= 0x03d4,
+	[LCCR]		= 0x03d8,
+	[CNDCR]		= 0x03dc,
+	[CEFCR]		= 0x03e4,
+	[FRECR]		= 0x03e8,
+	[TSFRCR]	= 0x03ec,
+	[TLFRCR]	= 0x03f0,
+	[RFCR]		= 0x03f4,
+	[MAFCR]		= 0x03f8,
+
+	[EDMR]		= 0x0200,
+	[EDTRR]		= 0x0208,
+	[EDRRR]		= 0x0210,
+	[TDLAR]		= 0x0218,
+	[RDLAR]		= 0x0220,
+	[EESR]		= 0x0228,
+	[EESIPR]	= 0x0230,
+	[TRSCER]	= 0x0238,
+	[RMFCR]		= 0x0240,
+	[TFTR]		= 0x0248,
+	[FDR]		= 0x0250,
+	[RMCR]		= 0x0258,
+	[TFUCR]		= 0x0264,
+	[RFOCR]		= 0x0268,
+	[FCFTR]		= 0x0270,
+	[TRIMD]		= 0x027c,
+};
+
 static const u16 sh_eth_offset_fast_sh4[SH_ETH_MAX_REGISTER_OFFSET] = {
 	[ECMR]		= 0x0100,
 	[RFLR]		= 0x0108,
@@ -296,7 +342,7 @@ static void sh_eth_select_mii(struct net
 #endif
 
 /* There is CPU dependent code */
-#if defined(CONFIG_CPU_SUBTYPE_SH7724) || defined(CONFIG_ARCH_R8A7779)
+#if defined(CONFIG_ARCH_R8A7779)
 #define SH_ETH_RESET_DEFAULT	1
 static void sh_eth_set_duplex(struct net_device *ndev)
 {
@@ -311,18 +357,60 @@ static void sh_eth_set_duplex(struct net
 static void sh_eth_set_rate(struct net_device *ndev)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
-	unsigned int bits = ECMR_RTM;
 
-#if defined(CONFIG_ARCH_R8A7779)
-	bits |= ECMR_ELB;
-#endif
+	switch (mdp->speed) {
+	case 10: /* 10BASE */
+		sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~ECMR_ELB, ECMR);
+		break;
+	case 100:/* 100BASE */
+		sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | ECMR_ELB, ECMR);
+		break;
+	default:
+		break;
+	}
+}
+
+/* R8A7779 */
+static struct sh_eth_cpu_data sh_eth_my_cpu_data = {
+	.set_duplex	= sh_eth_set_duplex,
+	.set_rate	= sh_eth_set_rate,
+
+	.ecsr_value	= ECSR_PSRTO | ECSR_LCHNG | ECSR_ICD,
+	.ecsipr_value	= ECSIPR_PSRTOIP | ECSIPR_LCHNGIP | ECSIPR_ICDIP,
+	.eesipr_value	= 0x01ff009f,
+
+	.tx_check	= EESR_FTC | EESR_CND | EESR_DLC | EESR_CD | EESR_RTO,
+	.eesr_err_check	= EESR_TWB | EESR_TABT | EESR_RABT | EESR_RDE |
+			  EESR_RFRMER | EESR_TFE | EESR_TDE | EESR_ECI,
+	.tx_error_check	= EESR_TWB | EESR_TABT | EESR_TDE | EESR_TFE,
+
+	.apr		= 1,
+	.mpr		= 1,
+	.tpauser	= 1,
+	.hw_swap	= 1,
+};
+#elif defined(CONFIG_CPU_SUBTYPE_SH7724)
+#define SH_ETH_RESET_DEFAULT	1
+static void sh_eth_set_duplex(struct net_device *ndev)
+{
+	struct sh_eth_private *mdp = netdev_priv(ndev);
+
+	if (mdp->duplex) /* Full */
+		sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | ECMR_DM, ECMR);
+	else		/* Half */
+		sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~ECMR_DM, ECMR);
+}
+
+static void sh_eth_set_rate(struct net_device *ndev)
+{
+	struct sh_eth_private *mdp = netdev_priv(ndev);
 
 	switch (mdp->speed) {
 	case 10: /* 10BASE */
-		sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~bits, ECMR);
+		sh_eth_write(ndev, sh_eth_read(ndev, ECMR) & ~ECMR_RTM, ECMR);
 		break;
 	case 100:/* 100BASE */
-		sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | bits, ECMR);
+		sh_eth_write(ndev, sh_eth_read(ndev, ECMR) | ECMR_RTM, ECMR);
 		break;
 	default:
 		break;
@@ -2521,6 +2609,9 @@ static const u16 *sh_eth_get_register_of
 	case SH_ETH_REG_GIGABIT:
 		reg_offset = sh_eth_offset_gigabit;
 		break;
+	case SH_ETH_REG_FAST_RCAR:
+		reg_offset = sh_eth_offset_fast_rcar;
+		break;
 	case SH_ETH_REG_FAST_SH4:
 		reg_offset = sh_eth_offset_fast_sh4;
 		break;
Index: net-next/include/linux/sh_eth.h
===================================================================
--- net-next.orig/include/linux/sh_eth.h
+++ net-next/include/linux/sh_eth.h
@@ -6,6 +6,7 @@
 enum {EDMAC_LITTLE_ENDIAN, EDMAC_BIG_ENDIAN};
 enum {
 	SH_ETH_REG_GIGABIT,
+	SH_ETH_REG_FAST_RCAR,
 	SH_ETH_REG_FAST_SH4,
 	SH_ETH_REG_FAST_SH3_SH2
 };

^ permalink raw reply

* Re: [RFC][PATCH] iproute: Faster ip link add, set and delete
From: Stephen Hemminger @ 2013-03-28 22:20 UTC (permalink / raw)
  To: Benoit Lourdelet; +Cc: Serge Hallyn, Eric W. Biederman, netdev@vger.kernel.org
In-Reply-To: <CD7A1D9A.78BA%blourdel@juniper.net>

[-- Attachment #1: Type: text/plain, Size: 128 bytes --]

Try the following two patches. It adds a name hash list, and uses Eric's idea
to avoid loading map on add/delete operations.




[-- Attachment #2: 0001-ll_map-add-name-and-index-hash.patch --]
[-- Type: text/x-patch, Size: 8143 bytes --]

>From 0025e5d63d5d1598ab622867834a3bcb9f518f9f Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Thu, 28 Mar 2013 14:57:28 -0700
Subject: [PATCH 1/2] ll_map: add name and index hash

Make ll_ functions faster by having a name hash, and allow
for deletion. Also, allow them to work without calling ll_init_map.
---
 include/hlist.h  |   56 ++++++++++++++++++++
 include/ll_map.h |    3 +-
 lib/ll_map.c     |  155 ++++++++++++++++++++++++++++++++++--------------------
 3 files changed, 157 insertions(+), 57 deletions(-)
 create mode 100644 include/hlist.h

diff --git a/include/hlist.h b/include/hlist.h
new file mode 100644
index 0000000..4e8de9e
--- /dev/null
+++ b/include/hlist.h
@@ -0,0 +1,56 @@
+#ifndef __HLIST_H__
+#define __HLIST_H__ 1
+/* Hash list stuff from kernel */
+
+#include <stddef.h>
+
+#define container_of(ptr, type, member) ({			\
+	const typeof( ((type *)0)->member ) *__mptr = (ptr);	\
+	(type *)( (char *)__mptr - offsetof(type,member) );})
+
+struct hlist_head {
+	struct hlist_node *first;
+};
+
+struct hlist_node {
+	struct hlist_node *next, **pprev;
+};
+
+static inline void hlist_del(struct hlist_node *n)
+{
+	struct hlist_node *next = n->next;
+	struct hlist_node **pprev = n->pprev;
+	*pprev = next;
+	if (next)
+		next->pprev = pprev;
+}
+
+static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
+{
+	struct hlist_node *first = h->first;
+	n->next = first;
+	if (first)
+		first->pprev = &n->next;
+	h->first = n;
+	n->pprev = &h->first;
+}
+
+#define hlist_for_each(pos, head) \
+	for (pos = (head)->first; pos ; pos = pos->next)
+
+
+#define hlist_for_each_safe(pos, n, head) \
+	for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \
+	     pos = n)
+
+#define hlist_entry_safe(ptr, type, member) \
+	({ typeof(ptr) ____ptr = (ptr); \
+	   ____ptr ? hlist_entry(____ptr, type, member) : NULL; \
+	})
+
+#define hlist_for_each_entry(pos, head, member)				\
+	for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member);\
+	     pos;							\
+	     pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
+
+#endif /* __HLIST_H__ */
diff --git a/include/ll_map.h b/include/ll_map.h
index c4d5c6d..f1dda39 100644
--- a/include/ll_map.h
+++ b/include/ll_map.h
@@ -3,7 +3,8 @@
 
 extern int ll_remember_index(const struct sockaddr_nl *who,
 			     struct nlmsghdr *n, void *arg);
-extern int ll_init_map(struct rtnl_handle *rth);
+
+extern void ll_init_map(struct rtnl_handle *rth);
 extern unsigned ll_name_to_index(const char *name);
 extern const char *ll_index_to_name(unsigned idx);
 extern const char *ll_idx_n2a(unsigned idx, char *buf);
diff --git a/lib/ll_map.c b/lib/ll_map.c
index e9ae129..fd7db55 100644
--- a/lib/ll_map.c
+++ b/lib/ll_map.c
@@ -22,10 +22,11 @@
 
 #include "libnetlink.h"
 #include "ll_map.h"
+#include "hlist.h"
 
-struct ll_cache
-{
-	struct ll_cache   *idx_next;
+struct ll_cache {
+	struct hlist_node idx_hash;
+	struct hlist_node name_hash;
 	unsigned	flags;
 	int		index;
 	unsigned short	type;
@@ -33,49 +34,107 @@ struct ll_cache
 };
 
 #define IDXMAP_SIZE	1024
-static struct ll_cache *idx_head[IDXMAP_SIZE];
+static struct hlist_head idx_head[IDXMAP_SIZE];
+static struct hlist_head name_head[IDXMAP_SIZE];
 
-static inline struct ll_cache *idxhead(int idx)
+static struct ll_cache *ll_get_by_index(unsigned index)
 {
-	return idx_head[idx & (IDXMAP_SIZE - 1)];
+	struct hlist_node *n;
+	unsigned h = index & (IDXMAP_SIZE - 1);
+
+	hlist_for_each(n, &idx_head[h]) {
+		struct ll_cache *im
+			= container_of(n, struct ll_cache, idx_hash);
+		if (im->index == index)
+			return im;
+	}
+
+	return NULL;
+}
+
+static unsigned namehash(const char *str)
+{
+	unsigned hash = 5381;
+
+	while (*str)
+		hash = ((hash << 5) + hash) + *str++; /* hash * 33 + c */
+
+	return hash;
+}
+
+static struct ll_cache *ll_get_by_name(const char *name)
+{
+	struct hlist_node *n;
+	unsigned h = namehash(name) & (IDXMAP_SIZE - 1);
+
+	hlist_for_each(n, &name_head[h]) {
+		struct ll_cache *im
+			= container_of(n, struct ll_cache, name_hash);
+
+		if (strncmp(im->name, name, IFNAMSIZ) == 0)
+			return im;
+	}
+
+	return NULL;
 }
 
 int ll_remember_index(const struct sockaddr_nl *who,
 		      struct nlmsghdr *n, void *arg)
 {
-	int h;
+	unsigned int h;
+	const char *ifname;
 	struct ifinfomsg *ifi = NLMSG_DATA(n);
-	struct ll_cache *im, **imp;
+	struct ll_cache *im;
 	struct rtattr *tb[IFLA_MAX+1];
 
-	if (n->nlmsg_type != RTM_NEWLINK)
+	if (n->nlmsg_type != RTM_NEWLINK && n->nlmsg_type != RTM_DELLINK)
 		return 0;
 
 	if (n->nlmsg_len < NLMSG_LENGTH(sizeof(ifi)))
 		return -1;
 
+	im = ll_get_by_index(ifi->ifi_index);
+	if (n->nlmsg_type == RTM_DELLINK) {
+		if (im) {
+			hlist_del(&im->name_hash);
+			hlist_del(&im->idx_hash);
+			free(im);
+		}
+		return 0;
+	}
+
 	memset(tb, 0, sizeof(tb));
 	parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), IFLA_PAYLOAD(n));
-	if (tb[IFLA_IFNAME] == NULL)
+	ifname = rta_getattr_str(tb[IFLA_IFNAME]);
+	if (ifname == NULL)
 		return 0;
 
-	h = ifi->ifi_index & (IDXMAP_SIZE - 1);
-	for (imp = &idx_head[h]; (im=*imp)!=NULL; imp = &im->idx_next)
-		if (im->index == ifi->ifi_index)
-			break;
-
-	if (im == NULL) {
-		im = malloc(sizeof(*im));
-		if (im == NULL)
-			return 0;
-		im->idx_next = *imp;
-		im->index = ifi->ifi_index;
-		*imp = im;
+	if (im) {
+		/* change to existing entry */
+		if (strcmp(im->name, ifname) != 0) {
+			hlist_del(&im->name_hash);
+			h = namehash(ifname) & (IDXMAP_SIZE - 1);
+			hlist_add_head(&im->name_hash, &name_head[h]);
+		}
+
+		im->flags = ifi->ifi_flags;
+		return 0;
 	}
 
+	im = malloc(sizeof(*im));
+	if (im == NULL)
+		return 0;
+	im->index = ifi->ifi_index;
+	strcpy(im->name, ifname);
 	im->type = ifi->ifi_type;
 	im->flags = ifi->ifi_flags;
-	strcpy(im->name, RTA_DATA(tb[IFLA_IFNAME]));
+
+	h = ifi->ifi_index & (IDXMAP_SIZE - 1);
+	hlist_add_head(&im->idx_hash, &idx_head[h]);
+
+	h = namehash(ifname) & (IDXMAP_SIZE - 1);
+	hlist_add_head(&im->name_hash, &name_head[h]);
+
 	return 0;
 }
 
@@ -86,15 +145,16 @@ const char *ll_idx_n2a(unsigned idx, char *buf)
 	if (idx == 0)
 		return "*";
 
-	for (im = idxhead(idx); im; im = im->idx_next)
-		if (im->index == idx)
-			return im->name;
+	im = ll_get_by_index(idx);
+	if (im)
+		return im->name;
+
+	if (if_indextoname(idx, buf) == NULL)
+		snprintf(buf, IFNAMSIZ, "if%d", idx);
 
-	snprintf(buf, IFNAMSIZ, "if%d", idx);
 	return buf;
 }
 
-
 const char *ll_index_to_name(unsigned idx)
 {
 	static char nbuf[IFNAMSIZ];
@@ -108,10 +168,9 @@ int ll_index_to_type(unsigned idx)
 
 	if (idx == 0)
 		return -1;
-	for (im = idxhead(idx); im; im = im->idx_next)
-		if (im->index == idx)
-			return im->type;
-	return -1;
+
+	im = ll_get_by_index(idx);
+	return im ? im->type : -1;
 }
 
 unsigned ll_index_to_flags(unsigned idx)
@@ -121,35 +180,21 @@ unsigned ll_index_to_flags(unsigned idx)
 	if (idx == 0)
 		return 0;
 
-	for (im = idxhead(idx); im; im = im->idx_next)
-		if (im->index == idx)
-			return im->flags;
-	return 0;
+	im = ll_get_by_index(idx);
+	return im ? im->flags : -1;
 }
 
 unsigned ll_name_to_index(const char *name)
 {
-	static char ncache[IFNAMSIZ];
-	static int icache;
-	struct ll_cache *im;
-	int i;
+	const struct ll_cache *im;
 	unsigned idx;
 
 	if (name == NULL)
 		return 0;
 
-	if (icache && strcmp(name, ncache) == 0)
-		return icache;
-
-	for (i=0; i<IDXMAP_SIZE; i++) {
-		for (im = idx_head[i]; im; im = im->idx_next) {
-			if (strcmp(im->name, name) == 0) {
-				icache = im->index;
-				strcpy(ncache, name);
-				return im->index;
-			}
-		}
-	}
+	im = ll_get_by_name(name);
+	if (im)
+		return im->index;
 
 	idx = if_nametoindex(name);
 	if (idx == 0)
@@ -157,12 +202,12 @@ unsigned ll_name_to_index(const char *name)
 	return idx;
 }
 
-int ll_init_map(struct rtnl_handle *rth)
+void ll_init_map(struct rtnl_handle *rth)
 {
 	static int initialized;
 
 	if (initialized)
-		return 0;
+		return;
 
 	if (rtnl_wilddump_request(rth, AF_UNSPEC, RTM_GETLINK) < 0) {
 		perror("Cannot send dump request");
@@ -175,6 +220,4 @@ int ll_init_map(struct rtnl_handle *rth)
 	}
 
 	initialized = 1;
-
-	return 0;
 }
-- 
1.7.10.4


[-- Attachment #3: 0002-ip-remove-unnecessary-ll_init_map.patch --]
[-- Type: text/x-patch, Size: 2755 bytes --]

>From f0124b0f0aa0e5b9288114eb8e6ff9b4f8c33ec8 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Thu, 28 Mar 2013 15:17:47 -0700
Subject: [PATCH 2/2] ip: remove unnecessary ll_init_map

Don't call ll_init_map on modify operations
Saves significant overhead with 1000's of devices.
---
 ip/ipaddress.c    |    2 --
 ip/ipaddrlabel.c  |    2 --
 ip/iplink.c       |    2 --
 ip/iproute.c      |    6 ------
 ip/xfrm_monitor.c |    2 --
 5 files changed, 14 deletions(-)

diff --git a/ip/ipaddress.c b/ip/ipaddress.c
index 149df69..5b9a438 100644
--- a/ip/ipaddress.c
+++ b/ip/ipaddress.c
@@ -1365,8 +1365,6 @@ static int ipaddr_modify(int cmd, int flags, int argc, char **argv)
 	if (!scoped && cmd != RTM_DELADDR)
 		req.ifa.ifa_scope = default_scope(&lcl);
 
-	ll_init_map(&rth);
-
 	if ((req.ifa.ifa_index = ll_name_to_index(d)) == 0) {
 		fprintf(stderr, "Cannot find device \"%s\"\n", d);
 		return -1;
diff --git a/ip/ipaddrlabel.c b/ip/ipaddrlabel.c
index eb6a48c..1789d9c 100644
--- a/ip/ipaddrlabel.c
+++ b/ip/ipaddrlabel.c
@@ -246,8 +246,6 @@ static int ipaddrlabel_flush(int argc, char **argv)
 
 int do_ipaddrlabel(int argc, char **argv)
 {
-	ll_init_map(&rth);
-
 	if (argc < 1) {
 		return ipaddrlabel_list(0, NULL);
 	} else if (matches(argv[0], "list") == 0 ||
diff --git a/ip/iplink.c b/ip/iplink.c
index 5c7b43c..dc98019 100644
--- a/ip/iplink.c
+++ b/ip/iplink.c
@@ -533,8 +533,6 @@ static int iplink_modify(int cmd, unsigned int flags, int argc, char **argv)
 		}
 	}
 
-	ll_init_map(&rth);
-
 	if (!(flags & NLM_F_CREATE)) {
 		if (!dev) {
 			fprintf(stderr, "Not enough information: \"dev\" "
diff --git a/ip/iproute.c b/ip/iproute.c
index 2c2a331..adef774 100644
--- a/ip/iproute.c
+++ b/ip/iproute.c
@@ -970,8 +970,6 @@ static int iproute_modify(int cmd, unsigned flags, int argc, char **argv)
 	if (d || nhs_ok)  {
 		int idx;
 
-		ll_init_map(&rth);
-
 		if (d) {
 			if ((idx = ll_name_to_index(d)) == 0) {
 				fprintf(stderr, "Cannot find device \"%s\"\n", d);
@@ -1265,8 +1263,6 @@ static int iproute_list_flush_or_save(int argc, char **argv, int action)
 	if (do_ipv6 == AF_UNSPEC && filter.tb)
 		do_ipv6 = AF_INET;
 
-	ll_init_map(&rth);
-
 	if (id || od)  {
 		int idx;
 
@@ -1452,8 +1448,6 @@ static int iproute_get(int argc, char **argv)
 		exit(1);
 	}
 
-	ll_init_map(&rth);
-
 	if (idev || odev)  {
 		int idx;
 
diff --git a/ip/xfrm_monitor.c b/ip/xfrm_monitor.c
index bfc48f1..a1f5d53 100644
--- a/ip/xfrm_monitor.c
+++ b/ip/xfrm_monitor.c
@@ -408,8 +408,6 @@ int do_xfrm_monitor(int argc, char **argv)
 		return rtnl_from_file(fp, xfrm_accept_msg, (void*)stdout);
 	}
 
-	//ll_init_map(&rth);
-
 	if (rtnl_open_byproto(&rth, groups, NETLINK_XFRM) < 0)
 		exit(1);
 
-- 
1.7.10.4


^ permalink raw reply related

* Re: r8169 auto speed down issue
From: Francois Romieu @ 2013-03-28 23:19 UTC (permalink / raw)
  To: hayeswang; +Cc: netdev, linux-kernel, bowgotsai, 'Ryankao'
In-Reply-To: <4EECA12DD88643FD9006350514D96C8E@realtek.com.tw>

hayeswang <hayeswang@realtek.com> :
[...]
> Do you have any suggestion about this?

Your description suggests that testing against the link partner ability
to work at 10M instead of testing for tp->link_ok could be good enough.

Does it make sense ?

-- 
Ueimor

^ permalink raw reply

* Re: [net-next PATCH 3/3] net: frag queue per hash bucket locking
From: Hannes Frederic Sowa @ 2013-03-28 23:30 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Jesper Dangaard Brouer, David S. Miller, netdev, Florian Westphal,
	Daniel Borkmann
In-Reply-To: <1364502164.15753.56.camel@edumazet-glaptop>

On Thu, Mar 28, 2013 at 01:22:44PM -0700, Eric Dumazet wrote:
> On Thu, 2013-03-28 at 19:57 +0100, Hannes Frederic Sowa wrote:
> 
> > I assume that it has to do with the usage of this code in
> > ipv6/netfilter/nf_conntrack_reasm.c, which could be invoked from process
> > context, if I read it correctly.
> 
> Then there would be a possible deadlock in current code.

Netfilter currently does a local_bh_disable() before entering inet_fragment
(and later enables it, again).

^ permalink raw reply

* Re: [net-next PATCH 3/3] net: frag queue per hash bucket locking
From: Eric Dumazet @ 2013-03-28 23:39 UTC (permalink / raw)
  To: Hannes Frederic Sowa
  Cc: Jesper Dangaard Brouer, David S. Miller, netdev, Florian Westphal,
	Daniel Borkmann
In-Reply-To: <20130328233002.GC20223@order.stressinduktion.org>

On Fri, 2013-03-29 at 00:30 +0100, Hannes Frederic Sowa wrote:
> On Thu, Mar 28, 2013 at 01:22:44PM -0700, Eric Dumazet wrote:
> > On Thu, 2013-03-28 at 19:57 +0100, Hannes Frederic Sowa wrote:
> > 
> > > I assume that it has to do with the usage of this code in
> > > ipv6/netfilter/nf_conntrack_reasm.c, which could be invoked from process
> > > context, if I read it correctly.
> > 
> > Then there would be a possible deadlock in current code.
> 
> Netfilter currently does a local_bh_disable() before entering inet_fragment
> (and later enables it, again).
> 

Good, so no need for the _bh() as I suspected.

^ permalink raw reply

* Re: [RFC][PATCH] iproute: Faster ip link add, set and delete
From: Eric W. Biederman @ 2013-03-28 23:52 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Benoit Lourdelet, Serge Hallyn, netdev@vger.kernel.org
In-Reply-To: <20130328152040.2c905ad9@nehalam.linuxnetplumber.net>

Stephen Hemminger <stephen@networkplumber.org> writes:

> Try the following two patches. It adds a name hash list, and uses Eric's idea
> to avoid loading map on add/delete operations.

On my microbenchmark of just creating 5000 veth pairs this takes pairs
16s instead of 13s of my earlier hacks but that is well down in the
usable range.

Deleting all of those network interfaces one by one takes me 60s.

So on the microbenchmark side this looks like a good improvement and
pretty usable.

I expect Benoit's container startup workload will also reflect this, but
it will be interesting to see the actual result.

Eric

^ permalink raw reply

* [PATCH net] net: fq_codel: Fix off-by-one error
From: Vijay Subramanian @ 2013-03-28 23:52 UTC (permalink / raw)
  To: netdev; +Cc: davem, eric.dumazet, Vijay Subramanian

Currently, we hold a max of sch->limit -1 number of packets instead of
sch->limit packets. Fix this off-by-one error.

Signed-off-by: Vijay Subramanian <subramanian.vijay@gmail.com>
---
 net/sched/sch_fq_codel.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 4e606fc..5578628 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -195,7 +195,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		flow->deficit = q->quantum;
 		flow->dropped = 0;
 	}
-	if (++sch->q.qlen < sch->limit)
+	if (++sch->q.qlen <= sch->limit)
 		return NET_XMIT_SUCCESS;
 
 	q->drop_overlimit++;
-- 
1.7.9.5

^ permalink raw reply related

* Re: [RFC][PATCH] iproute: Faster ip link add, set and delete
From: Eric Dumazet @ 2013-03-29  0:13 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Stephen Hemminger, Benoit Lourdelet, Serge Hallyn,
	netdev@vger.kernel.org
In-Reply-To: <87zjxn84ks.fsf@xmission.com>

On Thu, 2013-03-28 at 16:52 -0700, Eric W. Biederman wrote:

> On my microbenchmark of just creating 5000 veth pairs this takes pairs
> 16s instead of 13s of my earlier hacks but that is well down in the
> usable range.

I guess most of the time is taken by sysctl_check_table()

^ permalink raw reply

* Re: [RFC][PATCH] iproute: Faster ip link add, set and delete
From: Eric W. Biederman @ 2013-03-29  0:25 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Stephen Hemminger, Benoit Lourdelet, Serge Hallyn,
	netdev@vger.kernel.org
In-Reply-To: <1364516016.15753.59.camel@edumazet-glaptop>

Eric Dumazet <eric.dumazet@gmail.com> writes:

> On Thu, 2013-03-28 at 16:52 -0700, Eric W. Biederman wrote:
>
>> On my microbenchmark of just creating 5000 veth pairs this takes pairs
>> 16s instead of 13s of my earlier hacks but that is well down in the
>> usable range.
>
> I guess most of the time is taken by sysctl_check_table()

All of the significant sysctl slowdowns were fixed in 3.4.  If you see
something of sysctl show up in a trace I would be happy to talk about
it.  The kernel side seems to be creating N network devices seems to
take NlogN time now.  Both sysfs and sysctl store directories as
rbtrees removing their previous bottlenecks.

The loop I timed at 16s was just:

time for i in $(seq 1 5000) ; do ip link add a$i type veth peer name b$i; done

There is plenty of room for inefficiencies in 10000 network devices and
5000 forks+execs.

Eric

^ permalink raw reply

* Re: [net-next PATCH 3/3] net: frag queue per hash bucket locking
From: Hannes Frederic Sowa @ 2013-03-29  0:33 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Jesper Dangaard Brouer, David S. Miller, netdev, Florian Westphal,
	Daniel Borkmann
In-Reply-To: <1364513982.15753.57.camel@edumazet-glaptop>

On Thu, Mar 28, 2013 at 04:39:42PM -0700, Eric Dumazet wrote:
> On Fri, 2013-03-29 at 00:30 +0100, Hannes Frederic Sowa wrote:
> > On Thu, Mar 28, 2013 at 01:22:44PM -0700, Eric Dumazet wrote:
> > > On Thu, 2013-03-28 at 19:57 +0100, Hannes Frederic Sowa wrote:
> > > 
> > > > I assume that it has to do with the usage of this code in
> > > > ipv6/netfilter/nf_conntrack_reasm.c, which could be invoked from process
> > > > context, if I read it correctly.
> > > 
> > > Then there would be a possible deadlock in current code.
> > 
> > Netfilter currently does a local_bh_disable() before entering inet_fragment
> > (and later enables it, again).
> > 
> 
> Good, so no need for the _bh() as I suspected.

Ack.

I replaced the _bh spin_locks with plain spinlocks and tested the code
with sending fragments and receiving fragments (netfilter and reassmbly
logic) with lockdep and didn't get any splats. Looks good so far.

^ permalink raw reply

* Re: [RFC][PATCH] iproute: Faster ip link add, set and delete
From: Eric Dumazet @ 2013-03-29  0:43 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Stephen Hemminger, Benoit Lourdelet, Serge Hallyn,
	netdev@vger.kernel.org
In-Reply-To: <87ppyj6ohh.fsf@xmission.com>

On Thu, 2013-03-28 at 17:25 -0700, Eric W. Biederman wrote:
> Eric Dumazet <eric.dumazet@gmail.com> writes:
> 
> > On Thu, 2013-03-28 at 16:52 -0700, Eric W. Biederman wrote:
> >
> >> On my microbenchmark of just creating 5000 veth pairs this takes pairs
> >> 16s instead of 13s of my earlier hacks but that is well down in the
> >> usable range.
> >
> > I guess most of the time is taken by sysctl_check_table()
> 
> All of the significant sysctl slowdowns were fixed in 3.4.  If you see
> something of sysctl show up in a trace I would be happy to talk about
> it.  The kernel side seems to be creating N network devices seems to
> take NlogN time now.  Both sysfs and sysctl store directories as
> rbtrees removing their previous bottlenecks.
> 
> The loop I timed at 16s was just:
> 
> time for i in $(seq 1 5000) ; do ip link add a$i type veth peer name b$i; done
> 
> There is plenty of room for inefficiencies in 10000 network devices and
> 5000 forks+execs.

Ah right, the sysctl part is fixed ;)

In batch mode, I can create these veth pairs in 4 seconds

for i in $(seq 1 5000) ; do echo link add a$i type veth peer name b$i;
done | ip -batch -

^ permalink raw reply

* Re: [RFC][PATCH] iproute: Faster ip link add, set and delete
From: Eric W. Biederman @ 2013-03-29  1:06 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Stephen Hemminger, Benoit Lourdelet, Serge Hallyn,
	netdev@vger.kernel.org
In-Reply-To: <1364517837.15753.61.camel@edumazet-glaptop>

Eric Dumazet <eric.dumazet@gmail.com> writes:

> On Thu, 2013-03-28 at 17:25 -0700, Eric W. Biederman wrote:
>> Eric Dumazet <eric.dumazet@gmail.com> writes:
>> 
>> > On Thu, 2013-03-28 at 16:52 -0700, Eric W. Biederman wrote:
>> >
>> >> On my microbenchmark of just creating 5000 veth pairs this takes pairs
>> >> 16s instead of 13s of my earlier hacks but that is well down in the
>> >> usable range.
>> >
>> > I guess most of the time is taken by sysctl_check_table()
>> 
>> All of the significant sysctl slowdowns were fixed in 3.4.  If you see
>> something of sysctl show up in a trace I would be happy to talk about
>> it.  The kernel side seems to be creating N network devices seems to
>> take NlogN time now.  Both sysfs and sysctl store directories as
>> rbtrees removing their previous bottlenecks.
>> 
>> The loop I timed at 16s was just:
>> 
>> time for i in $(seq 1 5000) ; do ip link add a$i type veth peer name b$i; done
>> 
>> There is plenty of room for inefficiencies in 10000 network devices and
>> 5000 forks+execs.
>
> Ah right, the sysctl part is fixed ;)
>
> In batch mode, I can create these veth pairs in 4 seconds
>
> for i in $(seq 1 5000) ; do echo link add a$i type veth peer name b$i;
> done | ip -batch -

Yes.  The interesting story here is that the bottleneck before these
patches was the ll_init_map function of iproute2.   Which resulted in an
over an order of magnitude slowdown of when starting iproute on a system
with lots of network devices.

It is still unclear where iproute comes into the picture in the original
problem scenario of creating 2000 containers each with 2 veth pairs.
But apparently it was.

As the fundamental use case here was taking 2000 separate independent
actions it turns out to be important for things to not slowdown
unreasonably outside of batch mode.  So I was explicitly testing the
non-batch mode performance.

On the flip side it might be interesting to see if we can get batch mode
deletes to batch in the kernel, so we don't have to wait for through
syncrhonize_rcu_expidited for each of them.  Although for the container
case I can just drop the last reference to the network namespace and all
of the network device removals will batch.

Ultimately shrug.  Except in the previous O(N^2) userspace behavior
there don't seem to be any practical performance problems with this many
network devices.  What is interesting is that this many network devices
is becoming interesting on inexpensive COTS servers, for cases that are
not purely network focused.

Eric

^ permalink raw reply

* Re: [PATCH net-next] core: simplify the getting percpu of flow_cache
From: RongQing Li @ 2013-03-29  1:07 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, Christoph Lameter
In-Reply-To: <1364476527.15753.38.camel@edumazet-glaptop>

2013/3/28 Eric Dumazet <eric.dumazet@gmail.com>:
> On Thu, 2013-03-28 at 20:24 +0800, roy.qing.li@gmail.com wrote:
>> From: Li RongQing <roy.qing.li@gmail.com>
>>
>> replace per_cpu with per_cpu_ptr to save conversion between address and pointer
>>
>> Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
>> ---
>>  net/core/flow.c |    2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/net/core/flow.c b/net/core/flow.c
>> index 7fae135..707fb7b 100644
>> --- a/net/core/flow.c
>> +++ b/net/core/flow.c
>> @@ -334,7 +334,7 @@ static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu)
>>       struct flow_cache_percpu *fcp;
>>       int i;
>>
>> -     fcp = &per_cpu(*fc->percpu, cpu);
>> +     fcp = per_cpu_ptr(fc->percpu, cpu);
>>       for (i = 0; i < flow_cache_hash_size(fc); i++)
>>               if (!hlist_empty(&fcp->hash_table[i]))
>>                       return 0;
>
>
> This makes no difference at all, at least on x86
>
> Care to elaborate ?
>
I think it seems to be same as per_cpu, but it makes the codes easy to read,
by ignore & before per_cpu() and * beore fc->percpu, and other places in flow.c
are using  per_cpu_ptr(fc->percpu, cpu)

-Roy

^ permalink raw reply

* Transfer proposal
From: Ramos Brontons @ 2013-03-28 23:57 UTC (permalink / raw)


Dear Friend,
I got your email address from your country directory.
I have a transfer proposal worth fourteen millions five hundred US dollars 
(USD14,500, 000).  For more details and clarification,please reply my email or 
call me.

Regards,
Barrister Ramon Brontons
Tel: +44 7031975999
Email: b_brontonss@hotmail.com

^ permalink raw reply

* Re: [RFC][PATCH] iproute: Faster ip link add, set and delete
From: Eric Dumazet @ 2013-03-29  1:10 UTC (permalink / raw)
  To: Eric W. Biederman
  Cc: Stephen Hemminger, Benoit Lourdelet, Serge Hallyn,
	netdev@vger.kernel.org
In-Reply-To: <1364517837.15753.61.camel@edumazet-glaptop>

On Thu, 2013-03-28 at 17:43 -0700, Eric Dumazet wrote:

> In batch mode, I can create these veth pairs in 4 seconds
> 
> for i in $(seq 1 5000) ; do echo link add a$i type veth peer name b$i;
> done | ip -batch -


At rmmod time, 30% of cpu is spent in packet_notifier()

Maybe we can do something about this.

   30.85%         rmmod  [kernel.kallsyms]     [k]
packet_notifier                                                                                 
                   |
                   --- packet_notifier
                       notifier_call_chain
                       raw_notifier_call_chain
                       call_netdevice_notifiers
                       rollback_registered_many
                       unregister_netdevice_many
                       __rtnl_link_unregister
                       rtnl_link_unregister
                       0xffffffffa0044868
                       sys_delete_module
                       sysenter_dispatch

^ permalink raw reply

* Re: [PATCH] core: fix the use of this_cpu_ptr
From: RongQing Li @ 2013-03-29  1:24 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: Eric Dumazet, Shan Wei, netdev
In-Reply-To: <0000013db16f1e1d-abcb7d9e-1c9d-4ef9-b4de-767bc0282ccf-000000@email.amazonses.com>

2013/3/28 Christoph Lameter <cl@linux.com>:
> On Thu, 28 Mar 2013, Eric Dumazet wrote:
>
>> > flush_tasklet is not percpu var, and percpu is percpu var, and
>> >     this_cpu_ptr(&info->cache->percpu->flush_tasklet)
>> > is not equal to
>> >     &this_cpu_ptr(info->cache->percpu)->flush_tasklet
>
> &this_cpu_ptr is always an error since you are taking the addresss of an
> address.
>

&this_cpu_ptr()->flush_tasklet,   "->" has high priority than "&"
so the result is same as
 &(this_cpu_ptr()->flush_tasklet)
it should not a issue.

flush_tasklet is not a percpu var, it is a member of percpu var.

-Roy

> this_cpu_ptr(&structure) is the right way to get the address of the cpu
> instance for this cpu for a per cpu structure.
>
>> Christoph, could this kind of error be detected by the compiler or
>> sparse ?
>
> The per cpu variables are marked with __percpu. This should be detected by
> sparse.
>

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox