Netdev List

Netdev List
 help / color / mirror / Atom feed

* [PATCH (net.git) 3/3 (v3)] stmmac: fix driver Kconfig when built as module
From: Giuseppe CAVALLARO @ 2012-06-05  5:22 UTC (permalink / raw)
  To: netdev; +Cc: Giuseppe Cavallaro, Rayagond Kokatanur
In-Reply-To: <1338873777-5374-1-git-send-email-peppe.cavallaro@st.com>

This patches fixes the driver when built as dynamic module.
In fact, the platform part cannot be built and the probe fails
(thanks to Bob Liu that reported this bug).

v2: as D. Miller suggested, it is not necessary to make the
pci and the platform code mutually exclusive.
Having both could also help, at built time ,to verify that
all the code is validated and compiles fine.

v3: removed wrong Reviewed-by from the patch

Reported-by: Bob Liu <lliubbo@gmail.com>
cc: Rayagond Kokatanur <rayagond@vayavyalabs.com>
Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
 drivers/net/ethernet/stmicro/stmmac/Kconfig        |    5 +--
 drivers/net/ethernet/stmicro/stmmac/stmmac.h       |    3 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |   25 +++++++++++++++++
 drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c   |   29 +-------------------
 .../net/ethernet/stmicro/stmmac/stmmac_platform.c  |    4 +--
 5 files changed, 31 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 0364283..0076f77 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -13,9 +13,8 @@ config STMMAC_ETH
 if STMMAC_ETH
 
 config STMMAC_PLATFORM
-	tristate "STMMAC platform bus support"
+	bool "STMMAC Platform bus support"
 	depends on STMMAC_ETH
-	default y
 	---help---
 	  This selects the platform specific bus support for
 	  the stmmac device driver. This is the driver used
@@ -26,7 +25,7 @@ config STMMAC_PLATFORM
 	  If unsure, say N.
 
 config STMMAC_PCI
-	tristate "STMMAC support on PCI bus (EXPERIMENTAL)"
+	bool "STMMAC PCI bus support (EXPERIMENTAL)"
 	depends on STMMAC_ETH && PCI && EXPERIMENTAL
 	---help---
 	  This is to select the Synopsys DWMAC available on PCI devices,
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 6b5d060..6d07ba2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -95,7 +95,8 @@ extern int stmmac_mdio_register(struct net_device *ndev);
 extern void stmmac_set_ethtool_ops(struct net_device *netdev);
 extern const struct stmmac_desc_ops enh_desc_ops;
 extern const struct stmmac_desc_ops ndesc_ops;
-
+extern struct pci_driver stmmac_pci_driver;
+extern struct platform_driver stmmac_pltfr_driver;
 int stmmac_freeze(struct net_device *ndev);
 int stmmac_restore(struct net_device *ndev);
 int stmmac_resume(struct net_device *ndev);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index ea33eae..3638569 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -42,6 +42,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
 #include <linux/prefetch.h>
+#include <linux/pci.h>
 #ifdef CONFIG_STMMAC_DEBUG_FS
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
@@ -2093,6 +2094,30 @@ int stmmac_restore(struct net_device *ndev)
 }
 #endif /* CONFIG_PM */
 
+static int __init stmmac_init(void)
+{
+	int err = 0;
+
+	err = platform_driver_register(&stmmac_pltfr_driver);
+
+	if (!err) {
+		err = pci_register_driver(&stmmac_pci_driver);
+		if (err)
+			platform_driver_unregister(&stmmac_pltfr_driver);
+	}
+
+	return err;
+}
+
+static void __exit stmmac_exit(void)
+{
+	pci_unregister_driver(&stmmac_pci_driver);
+	platform_driver_unregister(&stmmac_pltfr_driver);
+}
+
+module_init(stmmac_init);
+module_exit(stmmac_exit);
+
 #ifndef MODULE
 static int __init stmmac_cmdline_opt(char *str)
 {
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index 58fab53..cf826e6 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -179,7 +179,7 @@ static DEFINE_PCI_DEVICE_TABLE(stmmac_id_table) = {
 
 MODULE_DEVICE_TABLE(pci, stmmac_id_table);
 
-static struct pci_driver stmmac_driver = {
+struct pci_driver stmmac_pci_driver = {
 	.name = STMMAC_RESOURCE_NAME,
 	.id_table = stmmac_id_table,
 	.probe = stmmac_pci_probe,
@@ -190,33 +190,6 @@ static struct pci_driver stmmac_driver = {
 #endif
 };
 
-/**
- * stmmac_init_module - Entry point for the driver
- * Description: This function is the entry point for the driver.
- */
-static int __init stmmac_init_module(void)
-{
-	int ret;
-
-	ret = pci_register_driver(&stmmac_driver);
-	if (ret < 0)
-		pr_err("%s: ERROR: driver registration failed\n", __func__);
-
-	return ret;
-}
-
-/**
- * stmmac_cleanup_module - Cleanup routine for the driver
- * Description: This function is the cleanup routine for the driver.
- */
-static void __exit stmmac_cleanup_module(void)
-{
-	pci_unregister_driver(&stmmac_driver);
-}
-
-module_init(stmmac_init_module);
-module_exit(stmmac_cleanup_module);
-
 MODULE_DESCRIPTION("STMMAC 10/100/1000 Ethernet PCI driver");
 MODULE_AUTHOR("Rayagond Kokatanur <rayagond.kokatanur@vayavyalabs.com>");
 MODULE_AUTHOR("Giuseppe Cavallaro <peppe.cavallaro@st.com>");
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 3dd8f08..680d2b8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -255,7 +255,7 @@ static const struct of_device_id stmmac_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, stmmac_dt_ids);
 
-static struct platform_driver stmmac_driver = {
+struct platform_driver stmmac_pltfr_driver = {
 	.probe = stmmac_pltfr_probe,
 	.remove = stmmac_pltfr_remove,
 	.driver = {
@@ -266,8 +266,6 @@ static struct platform_driver stmmac_driver = {
 		   },
 };
 
-module_platform_driver(stmmac_driver);
-
 MODULE_DESCRIPTION("STMMAC 10/100/1000 Ethernet PLATFORM driver");
 MODULE_AUTHOR("Giuseppe Cavallaro <peppe.cavallaro@st.com>");
 MODULE_LICENSE("GPL");
-- 
1.7.4.4

^ permalink raw reply related

* [PATCH (net.git) 2/3] stmmac: update driver's doc
From: Giuseppe CAVALLARO @ 2012-06-05  5:22 UTC (permalink / raw)
  To: netdev; +Cc: Giuseppe Cavallaro
In-Reply-To: <1338873777-5374-1-git-send-email-peppe.cavallaro@st.com>

Fixed the driver's documentation that was obsolete and didn't
report new platform fields (recently added).

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
 Documentation/networking/stmmac.txt |   44 +++++++++++++++++++---------------
 1 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/Documentation/networking/stmmac.txt b/Documentation/networking/stmmac.txt
index ab1e8d7..5cb9a19 100644
--- a/Documentation/networking/stmmac.txt
+++ b/Documentation/networking/stmmac.txt
@@ -10,8 +10,8 @@ Currently this network device driver is for all STM embedded MAC/GMAC
 (i.e. 7xxx/5xxx SoCs), SPEAr (arm), Loongson1B (mips) and XLINX XC2V3000
 FF1152AMT0221 D1215994A VIRTEX FPGA board.
 
-DWC Ether MAC 10/100/1000 Universal version 3.60a (and older) and DWC Ether MAC 10/100
-Universal version 4.0 have been used for developing this driver.
+DWC Ether MAC 10/100/1000 Universal version 3.60a (and older) and DWC Ether
+MAC 10/100 Universal version 4.0 have been used for developing this driver.
 
 This driver supports both the platform bus and PCI.
 
@@ -54,27 +54,27 @@ net_device structure enabling the scatter/gather feature.
 When one or more packets are received, an interrupt happens. The interrupts
 are not queued so the driver has to scan all the descriptors in the ring during
 the receive process.
-This is based on NAPI so the interrupt handler signals only if there is work to be
-done, and it exits.
+This is based on NAPI so the interrupt handler signals only if there is work
+to be done, and it exits.
 Then the poll method will be scheduled at some future point.
 The incoming packets are stored, by the DMA, in a list of pre-allocated socket
 buffers in order to avoid the memcpy (Zero-copy).
 
 4.3) Timer-Driver Interrupt
-Instead of having the device that asynchronously notifies the frame receptions, the
-driver configures a timer to generate an interrupt at regular intervals.
-Based on the granularity of the timer, the frames that are received by the device
-will experience different levels of latency. Some NICs have dedicated timer
-device to perform this task. STMMAC can use either the RTC device or the TMU
-channel 2  on STLinux platforms.
+Instead of having the device that asynchronously notifies the frame receptions,
+the driver configures a timer to generate an interrupt at regular intervals.
+Based on the granularity of the timer, the frames that are received by the
+device will experience different levels of latency. Some NICs have dedicated
+timer device to perform this task. STMMAC can use either the RTC device or the
+TMU channel 2  on STLinux platforms.
 The timers frequency can be passed to the driver as parameter; when change it,
 take care of both hardware capability and network stability/performance impact.
-Several performance tests on STM platforms showed this optimisation allows to spare
-the CPU while having the maximum throughput.
+Several performance tests on STM platforms showed this optimisation allows to
+spare the CPU while having the maximum throughput.
 
 4.4) WOL
-Wake up on Lan feature through Magic and Unicast frames are supported for the GMAC
-core.
+Wake up on Lan feature through Magic and Unicast frames are supported for the
+GMAC core.
 
 4.5) DMA descriptors
 Driver handles both normal and enhanced descriptors. The latter has been only
@@ -106,7 +106,8 @@ Several driver's information can be passed through the platform
 These are included in the include/linux/stmmac.h header file
 and detailed below as well:
 
- struct plat_stmmacenet_data {
+struct plat_stmmacenet_data {
+	char *phy_bus_name;
 	int bus_id;
 	int phy_addr;
 	int interface;
@@ -124,19 +125,24 @@ and detailed below as well:
 	void (*bus_setup)(void __iomem *ioaddr);
 	int (*init)(struct platform_device *pdev);
 	void (*exit)(struct platform_device *pdev);
+	void *custom_cfg;
+	void *custom_data;
 	void *bsp_priv;
  };
 
 Where:
+ o phy_bus_name: phy bus name to attach to the stmmac.
  o bus_id: bus identifier.
  o phy_addr: the physical address can be passed from the platform.
 	    If it is set to -1 the driver will automatically
 	    detect it at run-time by probing all the 32 addresses.
  o interface: PHY device's interface.
  o mdio_bus_data: specific platform fields for the MDIO bus.
- o pbl: the Programmable Burst Length is maximum number of beats to
+ o dma_cfg: internal DMA parameters
+   o pbl: the Programmable Burst Length is maximum number of beats to
        be transferred in one DMA transaction.
        GMAC also enables the 4xPBL by default.
+   o fixed_burst/mixed_burst/burst_len
  o clk_csr: fixed CSR Clock range selection.
  o has_gmac: uses the GMAC core.
  o enh_desc: if sets the MAC will use the enhanced descriptor structure.
@@ -160,8 +166,9 @@ Where:
 	     this is sometime necessary on some platforms (e.g. ST boxes)
 	     where the HW needs to have set some PIO lines or system cfg
 	     registers.
- o custom_cfg: this is a custom configuration that can be passed while
-	      initialising the resources.
+ o custom_cfg/custom_data: this is a custom configuration that can be passed
+			   while initialising the resources.
+ o bsp_priv: another private poiter.
 
 For MDIO bus The we have:
 
@@ -180,7 +187,6 @@ Where:
  o irqs: list of IRQs, one per PHY.
  o probed_phy_irq: if irqs is NULL, use this for probed PHY.
 
-
 For DMA engine we have the following internal fields that should be
 tuned according to the HW capabilities.
 
-- 
1.7.4.4

^ permalink raw reply related

* [PATCH (net.git) 1/3] stmmac: fix driver's doc when run kernel-doc script
From: Giuseppe CAVALLARO @ 2012-06-05  5:22 UTC (permalink / raw)
  To: netdev; +Cc: Giuseppe Cavallaro
In-Reply-To: <1338873777-5374-1-git-send-email-peppe.cavallaro@st.com>

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |    7 +++++--
 1 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 7096633..ea33eae 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -833,8 +833,9 @@ static u32 stmmac_get_synopsys_id(struct stmmac_priv *priv)
 
 /**
  * stmmac_selec_desc_mode
- * @dev : device pointer
- * Description: select the Enhanced/Alternate or Normal descriptors */
+ * @priv : private structure
+ * Description: select the Enhanced/Alternate or Normal descriptors
+ */
 static void stmmac_selec_desc_mode(struct stmmac_priv *priv)
 {
 	if (priv->plat->enh_desc) {
@@ -1861,6 +1862,8 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 /**
  * stmmac_dvr_probe
  * @device: device pointer
+ * @plat_dat: platform data pointer
+ * @addr: iobase memory address
  * Description: this is the main probe function used to
  * call the alloc_etherdev, allocate the priv structure.
  */
-- 
1.7.4.4

^ permalink raw reply related

* [PATCH (net.git) 0/3 (v3)] stmmac fixes for net.git
From: Giuseppe CAVALLARO @ 2012-06-05  5:22 UTC (permalink / raw)
  To: netdev; +Cc: Giuseppe Cavallaro

These patches fix a problem in the driver when built as dynamic
module and fix the driver's documentation.

v2: this patchset has the same patches I had sent before but
    I removed a patch that did a cleanup (now moved for net-next).

v3: removed wrong reviewed-by from this patch:
    "stmmac: fix driver Kconfig when built as module"

Giuseppe Cavallaro (3):
  stmmac: fix driver's doc when run kernel-doc script
  stmmac: update driver's doc
  stmmac: fix driver Kconfig when built as module

 Documentation/networking/stmmac.txt                |   44 +++++++++++--------
 drivers/net/ethernet/stmicro/stmmac/Kconfig        |    5 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac.h       |    3 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |   32 +++++++++++++-
 drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c   |   29 +------------
 .../net/ethernet/stmicro/stmmac/stmmac_platform.c  |    4 +-
 6 files changed, 61 insertions(+), 56 deletions(-)

-- 
1.7.4.4

^ permalink raw reply

* Re: [PATCH (net.git) 3/3] stmmac: fix driver Kconfig when built as module
From: Giuseppe CAVALLARO @ 2012-06-05  5:16 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: netdev, Rayagond Kokatanur
In-Reply-To: <1338828328.3979.208.camel@deadeye>

On 6/4/2012 6:45 PM, Ben Hutchings wrote:
> On Mon, 2012-06-04 at 18:32 +0200, Giuseppe CAVALLARO wrote:
>> This patches fixes the driver when built as dyn module.
>> In fact the platform part cannot be built and the probe fails
>> (thanks to Bob Liu that reported this bug).
>>
>> v2: as D. Miller suggested, it is not necessary to make the
>> pci and the platform code mutually exclusive.
>> Having both could also help, at built time ,to verify that
>> all the code is validated and compiles fine.
>>
>> Reported-by: Bob Liu <lliubbo@gmail.com>
>> Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
>> Reviewed-by: Ben Hutchings <bhutchings@solarflare.com>
> [...]
> 
> What I said in <1337687765.11796.11.camel@deadeye> was:
> 
> That's because CONFIG_STMMAC_PLATFORM and CONFIG_STMMAC_PCI are wrongly
> declared as tristate in Kconfig.  Change them to bool and it should work
> again.
> 
> You can't add a 'Reviewed-by' line on the basis of this; you can only
> add it if a reviewer writes that line themselves (and if you don't make
> further changes).

Thanks, I'm sending the patches again and sorry for the wrong usage of
Reviewed-by.

Peppe

> 
> Ben.
> 

^ permalink raw reply

* Re: [PATCH net-next 0/3] Remove casts to same type
From: Joe Perches @ 2012-06-05  4:54 UTC (permalink / raw)
  To: Jim Cromie; +Cc: David S. Miller, netdev
In-Reply-To: <CAJfuBxzVLzQK_d=JqJVzQnzHWZDtAJMihkEXWh7xeimMq9FHxQ@mail.gmail.com>

On Mon, 2012-06-04 at 22:48 -0600, Jim Cromie wrote:
> On Mon, Jun 4, 2012 at 4:44 PM, Joe Perches <joe@perches.com> wrote:
> > Adding casts of objects to the same type is unnecessary
> > and confusing for a human reader.
> > Remove them via coccinelle script.
> Can you also post the coccinelle script ?

Hi Jim.

I did that with each patch, but here it is again:

$ cat norecast.cocci
@@
type T;
T *p;
@@

-       (T *)p
+       p
$

> did you have to edit them any futher, or is it 100% script ?

Also from each patch:

I manually removed the conversions this script produces of
casts with __force, __iomem and __user.

Also, I did delete a few unnecessary parentheses left over
after the casts were removed.

^ permalink raw reply

* Re: [PATCH net-next 0/3] Remove casts to same type
From: Jim Cromie @ 2012-06-05  4:48 UTC (permalink / raw)
  To: Joe Perches; +Cc: David S. Miller, netdev
In-Reply-To: <cover.1338849364.git.joe@perches.com>

On Mon, Jun 4, 2012 at 4:44 PM, Joe Perches <joe@perches.com> wrote:
> Adding casts of objects to the same type is unnecessary
> and confusing for a human reader.
>
> Remove them via coccinelle script.
>

Can you also post the coccinelle script ?

did you have to edit them any futher, or is it 100% script ?

^ permalink raw reply

* Re: [PATCH IPROUTE2] tc-fq_codel: Add manpage
From: Vijay Subramanian @ 2012-06-05  4:06 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, Stephen Hemminger, Dave Taht
In-Reply-To: <1338865291.2760.1975.camel@edumazet-glaptop>

> Thanks !
>
> Could you also update man/man8/tc.8 ?
>

Sure.  I will send a patch.

Thanks,
Vijay

^ permalink raw reply

* RE: [PATCH RFC] c_can_pci: generic module for c_can on PCI
From: Bhupesh SHARMA @ 2012-06-05  3:42 UTC (permalink / raw)
  To: Federico Vaga, Alan Cox
  Cc: Wolfgang Grandegger, Marc Kleine-Budde, Giancarlo ASNAGHI,
	Alan Cox, Alessandro Rubini, linux-can@vger.kernel.org,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <1677842.Pq7naXsvrI@harkonnen>

> -----Original Message-----
> From: linux-can-owner@vger.kernel.org [mailto:linux-can-
> owner@vger.kernel.org] On Behalf Of Federico Vaga
> Sent: Monday, June 04, 2012 10:16 PM
> To: Alan Cox
> Cc: Wolfgang Grandegger; Marc Kleine-Budde; Giancarlo ASNAGHI; Alan
> Cox; Alessandro Rubini; linux-can@vger.kernel.org;
> netdev@vger.kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH RFC] c_can_pci: generic module for c_can on PCI
> 
> > > +static u16 c_can_pci_read_reg_aligned_to_16bit(struct c_can_priv
> > > *priv, +						void *reg)
> >
> > I'm a bit worried this function name might be too short ;)
> 
> I know :) I was inspired by the same function in c_can_platform.c

There was a purpose to keeping these long function names when I wrote
the c_can_platform driver initially. These were kept to support the 
SoCs (even the flaky ones) which I could trace at that time and used C_CAN controllers
(e.g. Hynix, ST's SPEAr eMPUs, etc..) and had different register bank layouts.

In some of these SoC's the C_CAN registers which are essentially 16-bit or 32-bit
registers are aligned always to a 32-bit boundary (i.e. even a 16-bit register
is aligned to 32-bit boundary).

So, I had to implement two variants of the read/write reg routines. I am not sure your SoC implementation needs them.
If it does, I will categorize it as flaky as well :)

> About these function I suggest to move them into c_can.c because they
> are the same for c_can_platform.c and c_can_pci.c Then add a new field
> c_can_priv->offset which can be used to shift the register offset
> coherently with the memory alignment. Finally, remove c_can_priv-
> >read_reg and c_can_priv->write_reg and use internal c_can.c function
> to
> read and write registers.

See above. There was a reason for keeping these routines in c_can_platform.c
Simply put, every platform having a Bosch C_CAN module can have it's own implementation
of the bus (for example you use PCI) and register bank layout (16-bit or 32-bit aligned).

I would suggest to keep the same arrangement.

> static u16 c_can_read_reg(struct c_can_priv *priv, enum reg index)
> {
> 	return readw(priv->base + (priv->regs[index] << priv->offset));
> }
> static void c_can_write_reg(struct c_can_priv *priv, enum reg index,
> 						u16 val)
> {
> 	writew(val, priv->base + (priv->regs[index] << priv->offset));
> }
> 
> 
> If it's ok, I can made a patch for this in the next days.
> 

[snip..]

Regards,
Bhupesh

^ permalink raw reply

* Re: [PATCH net-next 1/2] inetpeer: add namespace support for inetpeer
From: Eric Dumazet @ 2012-06-05  3:40 UTC (permalink / raw)
  To: Gao feng; +Cc: herbert, steffen.klassert, davem, netdev, containers
In-Reply-To: <1338863012-4902-1-git-send-email-gaofeng@cn.fujitsu.com>

On Tue, 2012-06-05 at 10:23 +0800, Gao feng wrote:
> now inetpeer doesn't support namespace,the information will
> be leaking across namespace.
> 
> this patch move the global vars v4_peers and v6_peers to
> netns_ipv4 and netns_ipv6 as a field peers.
> 
> add struct pernet_operations inetpeer_ops to initial pernet
> inetpeer data.
> 
> and change family_to_base and inet_getpeer to support namespace.
> 
> Signed-off-by: Gao feng <gaofeng@cn.fujitsu.com>
> ---
>  include/net/inetpeer.h   |   10 +++++---
>  include/net/netns/ipv4.h |    1 +
>  include/net/netns/ipv6.h |    1 +
>  net/ipv4/inetpeer.c      |   54 +++++++++++++++++++++++++++++++++++++++------
>  net/ipv4/route.c         |    2 +-
>  5 files changed, 55 insertions(+), 13 deletions(-)
> 
> diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
> index b94765e..4a50449 100644
> --- a/include/net/inetpeer.h
> +++ b/include/net/inetpeer.h
> @@ -72,7 +72,9 @@ static inline bool inet_metrics_new(const struct inet_peer *p)
>  }
>  
>  /* can be called with or without local BH being disabled */
> -struct inet_peer	*inet_getpeer(const struct inetpeer_addr *daddr, int create);
> +struct inet_peer *inet_getpeer(struct net *net,
> +			       const struct inetpeer_addr *daddr,
> +			       int create);
>  
>  static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create)
>  {
> @@ -80,7 +82,7 @@ static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create)
>  
>  	daddr.addr.a4 = v4daddr;
>  	daddr.family = AF_INET;
> -	return inet_getpeer(&daddr, create);
> +	return inet_getpeer(&init_net, &daddr, create);
>  }
>  
>  static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr, int create)
> @@ -89,14 +91,14 @@ static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr,
>  
>  	*(struct in6_addr *)daddr.addr.a6 = *v6daddr;
>  	daddr.family = AF_INET6;
> -	return inet_getpeer(&daddr, create);
> +	return inet_getpeer(&init_net, &daddr, create);
>  }
>  
>  /* can be called from BH context or outside */
>  extern void inet_putpeer(struct inet_peer *p);
>  extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout);
>  
> -extern void inetpeer_invalidate_tree(int family);
> +extern void inetpeer_invalidate_tree(struct net *net, int family);
>  
>  /*
>   * temporary check to make sure we dont access rid, ip_id_count, tcp_ts,
> diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
> index bbd023a..0855e09 100644
> --- a/include/net/netns/ipv4.h
> +++ b/include/net/netns/ipv4.h
> @@ -31,6 +31,7 @@ struct netns_ipv4 {
>  	struct sock		**icmp_sk;
>  	struct sock		*tcp_sock;
>  
> +	struct inet_peer_base	*peers;
>  	struct netns_frags	frags;
>  #ifdef CONFIG_NETFILTER
>  	struct xt_table		*iptable_filter;
> diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
> index b42be53..df0a545 100644
> --- a/include/net/netns/ipv6.h
> +++ b/include/net/netns/ipv6.h
> @@ -33,6 +33,7 @@ struct netns_ipv6 {
>  	struct netns_sysctl_ipv6 sysctl;
>  	struct ipv6_devconf	*devconf_all;
>  	struct ipv6_devconf	*devconf_dflt;
> +	struct inet_peer_base	*peers;
>  	struct netns_frags	frags;
>  #ifdef CONFIG_NETFILTER
>  	struct xt_table		*ip6table_filter;
> diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
> index d4d61b6..dafb8b0 100644
> --- a/net/ipv4/inetpeer.c
> +++ b/net/ipv4/inetpeer.c
> @@ -90,13 +90,11 @@ struct inet_peer_base {
>  
>  static struct inet_peer_base v4_peers = {
>  	.root		= peer_avl_empty_rcu,
> -	.lock		= __SEQLOCK_UNLOCKED(v4_peers.lock),
>  	.total		= 0,
>  };
>  

Please remove v4_peers & v6_peers

>  static struct inet_peer_base v6_peers = {
>  	.root		= peer_avl_empty_rcu,
> -	.lock		= __SEQLOCK_UNLOCKED(v6_peers.lock),
>  	.total		= 0,
>  };
>  
> @@ -153,6 +151,41 @@ static void inetpeer_gc_worker(struct work_struct *work)
>  	schedule_delayed_work(&gc_work, gc_delay);
>  }
>  
> +static int __net_init inetpeer_net_init(struct net *net)
> +{
> +
> +	net->ipv4.peers = kmemdup(&v4_peers,
> +				  sizeof(v4_peers),
> +				  GFP_KERNEL);

kzalloc(), and init ->root to peer_avl_empty_rcu

> +	if (net->ipv4.peers == NULL)
> +		return -1;
> +	seqlock_init(&net->ipv4.peers->lock);
> +	net->ipv6.peers = kmemdup(&v6_peers,
> +				  sizeof(v6_peers),
> +				  GFP_KERNEL);

kzalloc(), and init ->root to peer_avl_empty_rcu

> +	if (net->ipv6.peers == NULL)
> +		goto out_ipv6;
> +	seqlock_init(&net->ipv6.peers->lock);
> +	return 0;
> +out_ipv6:
> +	kfree(net->ipv4.peers);
> +	return -1;
> +}
> +

^ permalink raw reply

* Re: [ovs-dev] [PATCH 01/21] datapath: tunnelling: Replace tun_id with tun_key
From: Jesse Gross @ 2012-06-05  3:33 UTC (permalink / raw)
  To: Simon Horman; +Cc: dev, netdev
In-Reply-To: <20120604223413.GF28747@verge.net.au>

On Tue, Jun 5, 2012 at 7:34 AM, Simon Horman <horms@verge.net.au> wrote:
> On Sun, Jun 03, 2012 at 06:15:04PM +0900, Jesse Gross wrote:
>> On Thu, May 24, 2012 at 6:08 PM, Simon Horman <horms@verge.net.au> wrote:
>> > @@ -1204,15 +1210,21 @@ int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port, __be64 *tun_id,
>> >  int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
>> >  {
>> >        struct ovs_key_ethernet *eth_key;
>> > +       struct ovs_key_ipv4_tunnel *tun_key;
>> >        struct nlattr *nla, *encap;
>> >
>> >        if (swkey->phy.priority &&
>> >            nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority))
>> >                goto nla_put_failure;
>> >
>> > -       if (swkey->phy.tun_id != cpu_to_be64(0) &&
>> > -           nla_put_be64(skb, OVS_KEY_ATTR_TUN_ID, swkey->phy.tun_id))
>> > -               goto nla_put_failure;
>> > +       if (swkey->phy.tun_key.ipv4_dst) {
>>
>> It's probably OK to use DIP equal to zero as a not present marker but
>> we need to enforce that it's always true - for example we shouldn't
>> allow somebody to setup a flow that way or receive packets with a zero
>> address.  Alternately, we may be able to find a spare bit to indicate
>> this, like is done with vlans.
>
> When I originally wrote this there didn't seem to be any obvious
> place in ovs_key_ipv4_tunnel to have an active/inactive bit, which
> is in part why the code relies on checking DIP.
>
> However, more recent versions of ovs_key_ipv4_tunnel have a flags field of
> which only one bit is currently used. We could use one of the unused flag
> bits.

I guess it depends on what we end up doing with the lookup struct.  If
it stays as it is today, there's plenty of space if you include those
padding bytes.  If we shrink it down and there isn't a place then I do
think it is fine to use DIP (since this is traversing an IP stack and
DIP = 0 is an invalid value it's not like an L2 switch not allowing
invalid IP packet).  In that case, we just need to do more validation
in other places to make sure that this is the only situation that the
condition can arise.

>> In any case, I think we need to do some additional validation when
>> setting up flows to check reserved space, for example, as otherwise
>> that will never match.
>
> Sure. My testing seems to indicate that matching does occur,
> though I am quite happy to tighten things up.

I don't think it causes a problem as long as userspace is well
behaved, I was think it's best to detect problems early.

>> In a similar vein, struct ovs_key_ipv4_tunnel contains some fields
>> that I think can never apply for lookup such as the flags so it would
>> be nice if we could remove that for lookup.
>
> I think they need to be there to be passed around, so I'm not
> sure if they can easily be removed from ovs_key_ipv4_tunnel if that
> is what you are asking.

My guess is that we'll probably want to separate out the struct that
is used for lookup from the one that is used for communication with
userspace, which is what we do for most things so that we have more
freedom to optimize in the kernel.

>> >  bool ovs_tnl_frag_needed(struct vport *vport,
>> >                         const struct tnl_mutable_config *mutable,
>> > -                        struct sk_buff *skb, unsigned int mtu, __be64 flow_key)
>> > +                        struct sk_buff *skb, unsigned int mtu,
>> > +                        struct ovs_key_ipv4_tunnel *tun_key)
>> >  {
>> >        unsigned int eth_hdr_len = ETH_HLEN;
>> >        unsigned int total_length = 0, header_length = 0, payload_length;
>> >        struct ethhdr *eh, *old_eh = eth_hdr(skb);
>> >        struct sk_buff *nskb;
>> > +       struct ovs_key_ipv4_tunnel ntun_key;
>> >
>> >        /* Sanity check */
>> >        if (skb->protocol == htons(ETH_P_IP)) {
>> > @@ -705,8 +707,10 @@ bool ovs_tnl_frag_needed(struct vport *vport,
>> >         * any way of synthesizing packets.
>> >         */
>> >        if ((mutable->flags & (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION)) ==
>> > -           (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION))
>> > -               OVS_CB(nskb)->tun_id = flow_key;
>> > +           (TNL_F_IN_KEY_MATCH | TNL_F_OUT_KEY_ACTION)) {
>> > +               ntun_key = *tun_key;
>> > +               OVS_CB(nskb)->tun_key = &ntun_key;
>> > +       }
>>
>> I guess this is probably where you were going to use the function to
>> reverse IP addresses.  The logic doesn't really work but it's moot
>> since this is going away anyways.
>
> My latest series includes a clean up to ovs_tnl_frag_needed() to allow
> it to work in some circumstances - i.e. those found in my test environment.
> That series removes knowledge of tun_key from ovs_tnl_frag_needed().
>
> I am however happy to remove ovs_tnl_frag_needed() completely if you think
> that is appropriate.

I think, in retrospect, that the path MTU discovery that I implemented
here was probably not the right choice and MSS clamping is the correct
way to do things.  It was better when it wasn't possible to do any
kind of flow-based manipulation of tunnels but the model is breaking
down more and more over time.  Given that I would be hesitant to
submit it upstream and since that's the goal of this work, removing it
completely is probably the right thing to do.

^ permalink raw reply

* Re: [RFC PATCH v1 2/3] net: add VEPA, VEB bridge mode
From: Krishna Kumar2 @ 2012-06-05  3:11 UTC (permalink / raw)
  To: John Fastabend
  Cc: bhutchings, buytenh, eilong, eric.w.multanen, gregory.v.rose,
	hadi, jeffrey.t.kirsher, mst, netdev, shemminger, sri
In-Reply-To: <4FCCE46C.7080809@intel.com>

John Fastabend <john.r.fastabend@intel.com> wrote on 06/04/2012 10:08:04
PM:

> > I think you should do something like:
> >
> >         if ((flags == BRIDGE_FLAGS_MASTER) && ...)
> >                 ...
> >
> > Also you could use BRIDGE_FLAGS_MASTER=1, SELF=2, and use
> > "if (flags & BRIDGE_FLAGS_MASTER)" for consistency?
>
> OK this is likely a good thing otherwise user space is a
> bit tedious when managing FDB and bridge modes. We do still
> need the !flags case to support existing applications though,
> (we must maintain existing semantics)
>
> if (!flags || (flags & BRIDGE_FLAGS_MASTER) && ...)
>    ...
> else (flags & BRIDGE_FLAGS_SELF)
>    ...

Yes, looks good.

> > It is possible to return a reporting error even though
> > the operation succeeded. Maybe something that could be
> > done here to indicate that the operation succeeded, or
> > is that a TODO?
> >
>
> The problem is if rtnl_bridge_notify fails due to memory
> constraints or otherwise. In this case the set has already
> completed successfully as you note so we should not return
> any error. This should fix it if I understand your concern
> correctly.
>
>    if (!err)
>       rtnl_bridge_notify(dev, flags);
>    return err;

Yes. I guess user will not hang waiting for a response as it
will pass NLM_F_ACK, which allows netlink_rcv_skb to call
netlink_ack.

Thanks,
- KK

^ permalink raw reply

* Wireless networking without CONFIG_PM..
From: Linus Torvalds @ 2012-06-05  3:09 UTC (permalink / raw)
  To: John W. Linville, Johannes Berg, David S. Miller; +Cc: linux-wireless, netdev

I wonder if anybody has really ever tested that? Because I think it's broken..

In particular, I made the mistake of not enabling CONFIG_PM on a new
laptop, and it caused some *seriously* nasty-to-debug problems. The
mac80211 code goes crazy, that upsets the wireless driver, and then
the wrieless driver in question had a nasty bug where it would
double-release its firmware, which then caused vmalloc corruption and
all kinds of really odd recursive panics etc.

And as far as I can tell, the root cause of the problem is a bad
choice in net/mac80211/main.c:

  int ieee80211_register_hw(struct ieee80211_hw *hw)
  {
  ...
        if ((hw->wiphy->wowlan.flags || hw->wiphy->wowlan.n_patterns)
  #ifdef CONFIG_PM
            && (!local->ops->suspend || !local->ops->resume)
  #endif
            )
                return -EINVAL;

which means that if the wiphy says it supports wake-on-wireless lan,
and CONFIG_PM isn't enabled, we'll return -EINVAL and will refuse to
do any wireless at all.

It's that a bit extreme? Or outright stupid? What is the advantage of
saying that "if you don't have CONFIG_PM enabled, we'll just refuse to
register any hardware that talks about it's wake-on-wireless
patterns"?

Maybe there is some reason for that return -EINVAL, but I don't think
it makes sense with that particular placement of #ifdef CONFIG_PM.
Maybe if the #ifdef was around the whole test? Or maybe it should just
be removed.

Or am I missing some big reason for why it makes sense to do that? Hmm?

I'll make a separate bug-report email to the intel iwlwifi people
about their absolutely horribly broken error handling which then made
it such a disaster, but I thought I'd bring the generic mac80211 issue
up. I don't think there are a lot of drivers that do the whole wowlan
thing, and obviously most people use wireless on laptops where you
want CONFIG_PM anyway, so it probably hasn't triggered very much.

                  Linus

^ permalink raw reply

* Re: [PATCH IPROUTE2] tc-fq_codel: Add manpage
From: Eric Dumazet @ 2012-06-05  3:01 UTC (permalink / raw)
  To: Vijay Subramanian; +Cc: netdev, Stephen Hemminger, Dave Taht
In-Reply-To: <1338850557-5199-1-git-send-email-subramanian.vijay@gmail.com>

On Mon, 2012-06-04 at 15:55 -0700, Vijay Subramanian wrote:
> This patch adds the manpage for the FQ_CoDel (Fair Queuing Controlled-Delay)
> AQM.
> 
> Signed-off-by: Vijay Subramanian <subramanian.vijay@gmail.com>
> ---
>  man/man8/Makefile      |    2 +-
>  man/man8/tc-fq_codel.8 |  108 ++++++++++++++++++++++++++++++++++++++++++++++++
>  2 files changed, 109 insertions(+), 1 deletions(-)
>  create mode 100644 man/man8/tc-fq_codel.8

Thanks !

Could you also update man/man8/tc.8 ?

^ permalink raw reply

* Re: [PATCH net-next] net: netdev_alloc_skb() use build_skb()
From: Eric Dumazet @ 2012-06-05  2:50 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Willy Tarreau, David Miller, netdev
In-Reply-To: <20120604212050.GA2139@redhat.com>

On Tue, 2012-06-05 at 00:20 +0300, Michael S. Tsirkin wrote:
> On Mon, Jun 04, 2012 at 09:56:52PM +0200, Eric Dumazet wrote:
> > On Mon, 2012-06-04 at 22:48 +0300, Michael S. Tsirkin wrote:
> > 
> > > If I do this what will truesize be? 128, no?
> > 
> > My example was not correct, since you must have enough room for the
> > SKB_DATA_ALIGN(sizeof(struct skb_shared_info))  ( 320 bytes )
> > 
> > So it would be 128 + 320 = 448
> > 
> 
> 
> Ugh. I forgot about that. shinfo goes into the same page,
> so we'll have to also make all frags shorter by 320
> to leave space for shinfo at tail.
> overall looks like we need hyprevisor extensions if
> we want to use build_skb ...

Maybe not.

If you provided a 2048 bytes block and hypervisor filled one (small)
frame, there might be available room at the end of the block for the
shinfo already.

If yes : You can use build_skb()
If not : netdev_alloc_skb_ip_align()

I mentioned this trick for ixgbe driver.

^ permalink raw reply

* Re: [PATCH net-next] net: netdev_alloc_skb() use build_skb()
From: Eric Dumazet @ 2012-06-05  2:46 UTC (permalink / raw)
  To: Michael S. Tsirkin; +Cc: Willy Tarreau, David Miller, netdev
In-Reply-To: <20120604215434.GB3193@redhat.com>

On Tue, 2012-06-05 at 00:54 +0300, Michael S. Tsirkin wrote:

> Yes but what bugs me if the box is not under memory pressure
> this overestimation limits buffers for no real gain.
> How about we teach tcp to use data_len for buffer
> limits normally and switch to truesize when low on memory?
> 

You should first have evidence of the effect of this limitation.

I see more evidence of poor choices at driver level than core level.

^ permalink raw reply

* Re: linux-next: Tree for Apr 12
From: Eric Paris @ 2012-06-05  2:42 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Stephen Rothwell, linux-next, LKML, netdev, James Morris,
	Stephen Smalley
In-Reply-To: <1338849977.2860.9.camel@localhost>

On Mon, 2012-06-04 at 18:46 -0400, Eric Paris wrote:
> On Mon, 2012-06-04 at 15:04 -0700, Andrew Morton wrote:
> > On Thu, 12 Apr 2012 14:24:15 -0700
> > Andrew Morton <akpm@linux-foundation.org> wrote:
> > 
> > > On Thu, 12 Apr 2012 14:59:31 +1000
> > > Stephen Rothwell <sfr@canb.auug.org.au> wrote:
> > > 
> > > > I have created today's linux-next tree at
> > > > git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
> > > 
> > > This isn't working for me.  Some time between April 3 and April 12
> > > someone merged something into the non-mm part of linux-next which broke
> > > ssh.
> > > 
> > > I boot the box and everything seems to come up OK, but attemtps to ssh
> > > into the machine fail with
> > > 
> > > X11 forwarding request failed on channel 0
> > > Last login: Thu Apr 12 13:04:35 2012 from akpm.corp.google.com
> > > Connection to akpm2 closed.
> > > 
> > > I took a peek in the `strace ssh' output.
> > > 
> > > Good:
> > > 
> > > 17815 write(5, "Last login: Thu Apr 12 13:27:23 "..., 65) = 65
> > > 17815 select(7, [3 4], [], NULL, {120, 0}) = 1 (in [3], left {119, 770798})
> > > 17815 read(3, "\21O\200\366Mv\343\222\332\251\2403L\376Y18\2047\336\244\226p-+X\2%\2119\314\255"..., 8192) = 80
> > > 17815 select(7, [3 4], [5], NULL, {120, 0}) = 1 (out [5], left {119, 999987})
> > > 17815 write(5, "\r\33[m\17\33[27m\33[24m\33[Jakpm2:/home/ak"..., 39) = 39
> > > 17815 select(7, [3 4], [], NULL, {120, 0}) = 1 (in [4], left {118, 801111})
> > > 17815 read(4, "\4", 16384)              = 1
> > > 17815 select(7, [3 4], [3], NULL, {120, 0}) = 1 (out [3], left {119, 999991})
> > > 17815 write(3, "\235J\5\340\234\21\266\207\26e\362\327\2\332\1\267\272\200\364\267?/\320L\341\35\350{+M:\222"..., 48) = 48
> > > 
> > > 
> > > Bad:
> > > 
> > > 9305  write(5, "Last login: Thu Apr 12 13:02:54 "..., 65) = 65
> > > 9305  select(7, [3 4], [], NULL, {120, 0}) = 1 (in [3], left {119, 945541})
> > > 9305  read(3, "f\357\250~\260i\2259\320\3258\262)O\364;_\251\360-\314\31\374]\326\300\356\364\370S\3105"..., 8192) = 128
> > > 9305  close(5)                          = 0
> > > 9305  close(4)                          = 0
> > > 
> > > That read() is returning a lot more data.
> > > 
> > > It appears that we've done something which breaks X forwarding.  I
> > > won't be able to look any further into this until Monday.
> > 
> > This regression is now in mainline.  I've bisected it to an SELinux
> > patch, below.  I have confirmed that reverting just that patch from
> > current mainline fixes the regression.
> > 
> > Using openssh-server-4.3p2-14.fc6 on FC6.
> > 
> > 
> > commit 95dbf739313f09c8d859bde1373bc264ef979337
> > Author:     Eric Paris <eparis@redhat.com>
> > AuthorDate: Wed Apr 4 13:45:34 2012 -0400
> > Commit:     Eric Paris <eparis@redhat.com>
> > CommitDate: Mon Apr 9 12:22:49 2012 -0400
> > 
> >     SELinux: check OPEN on truncate calls
> >     
> >     In RH BZ 578841 we realized that the SELinux sandbox program was allowed to
> >     truncate files outside of the sandbox.  The reason is because sandbox
> >     confinement is determined almost entirely by the 'open' permission.  The idea
> >     was that if the sandbox was unable to open() files it would be unable to do
> >     harm to those files.  This turns out to be false in light of syscalls like
> >     truncate() and chmod() which don't require a previous open() call.  I looked
> >     at the syscalls that did not have an associated 'open' check and found that
> >     truncate(), did not have a seperate permission and even if it did have a
> >     separate permission such a permission owuld be inadequate for use by
> >     sandbox (since it owuld have to be granted so liberally as to be useless).
> >     This patch checks the OPEN permission on truncate.  I think a better solution
> >     for sandbox is a whole new permission, but at least this fixes what we have
> >     today.
> >     
> >     Signed-off-by: Eric Paris <eparis@redhat.com>
> > 
> > diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
> > index d85b793..f7d7e77 100644
> > --- a/security/selinux/hooks.c
> > +++ b/security/selinux/hooks.c
> > @@ -2708,6 +2708,7 @@ static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr)
> >  {
> >  	const struct cred *cred = current_cred();
> >  	unsigned int ia_valid = iattr->ia_valid;
> > +	__u32 av = FILE__WRITE;
> >  
> >  	/* ATTR_FORCE is just used for ATTR_KILL_S[UG]ID. */
> >  	if (ia_valid & ATTR_FORCE) {
> > @@ -2721,7 +2722,10 @@ static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr)
> >  			ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_TIMES_SET))
> >  		return dentry_has_perm(cred, dentry, FILE__SETATTR);
> >  
> > -	return dentry_has_perm(cred, dentry, FILE__WRITE);
> > +	if (ia_valid & ATTR_SIZE)
> > +		av |= FILE__OPEN;
> > +
> > +	return dentry_has_perm(cred, dentry, av);
> >  }
> >  
> >  static int selinux_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
> 
> Very odd indeed...  I can only assume you are SELinux enforcing and have
> a denial every time this fails.  Can you send me that denial?
> 
> I really do not want to revert this and feel that the only right fix is
> going to be to update your selinux policy to allow this new check.  I'd
> rather not allow (whatever program) to truncate() files willy-nilly (in
> violation of the intentions of selinux policy)
> 
> I'm sorry I never saw it sooner.  We've had it in RHEL for even longer
> than the 3 months it's been in -next.  I think the 'right' fix is going
> to have to be an update to SELinux policy (for your long dead system, if
> you give me the denial I can build you a new policy) rather than leaving
> the potential security hole in mainline...

Andrew sent me his audit log and it didn't show anything.  But it got me
thinking.  Now I think this actually is a code bug.  Andrew, can you
test this?

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 2e7bd67..20a4315 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2758,7 +2758,7 @@ static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr)
 			ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_TIMES_SET))
 		return dentry_has_perm(cred, dentry, FILE__SETATTR);
 
-	if (ia_valid & ATTR_SIZE)
+	if ((ia_valid & ATTR_SIZE) && selinux_policycap_openperm)
 		av |= FILE__OPEN;
 
 	return dentry_has_perm(cred, dentry, av);

^ permalink raw reply related

* [PATCH net-next 2/2] inetpeer: add parameter net for inet_getpeer_v4, v6
From: Gao feng @ 2012-06-05  2:23 UTC (permalink / raw)
  To: herbert-lOAM2aK0SrRLBo1qDEOMRrpzq4S04n8Q,
	steffen.klassert-opNxpl+3fjRBDgjK7y7TUQ,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA
In-Reply-To: <1338863012-4902-1-git-send-email-gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>

add struct net as a parameter of inet_getpeer_v[4,6],
use net to replace &init_net.

and modify some places to provide net for inet_getpeer_v[4,6]

Signed-off-by: Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
---
 include/net/inetpeer.h   |   12 ++++++++----
 net/ipv4/inet_fragment.c |    2 +-
 net/ipv4/ip_fragment.c   |    6 +++++-
 net/ipv4/route.c         |    8 +++++---
 net/ipv4/tcp_ipv4.c      |    6 ++++--
 net/ipv6/route.c         |    3 ++-
 net/ipv6/tcp_ipv6.c      |    6 ++++--
 7 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 4a50449..31101e0 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -76,22 +76,26 @@ struct inet_peer *inet_getpeer(struct net *net,
 			       const struct inetpeer_addr *daddr,
 			       int create);
 
-static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create)
+static inline struct inet_peer *inet_getpeer_v4(struct net *net,
+						__be32 v4daddr,
+						int create)
 {
 	struct inetpeer_addr daddr;
 
 	daddr.addr.a4 = v4daddr;
 	daddr.family = AF_INET;
-	return inet_getpeer(&init_net, &daddr, create);
+	return inet_getpeer(net, &daddr, create);
 }
 
-static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr, int create)
+static inline struct inet_peer *inet_getpeer_v6(struct net *net,
+						const struct in6_addr *v6daddr,
+						int create)
 {
 	struct inetpeer_addr daddr;
 
 	*(struct in6_addr *)daddr.addr.a6 = *v6daddr;
 	daddr.family = AF_INET6;
-	return inet_getpeer(&init_net, &daddr, create);
+	return inet_getpeer(net, &daddr, create);
 }
 
 /* can be called from BH context or outside */
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 5ff2a51..85190e6 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -243,12 +243,12 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
 	if (q == NULL)
 		return NULL;
 
+	q->net = nf;
 	f->constructor(q, arg);
 	atomic_add(f->qsize, &nf->mem);
 	setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
 	spin_lock_init(&q->lock);
 	atomic_set(&q->refcnt, 1);
-	q->net = nf;
 
 	return q;
 }
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 9dbd3dd..22c6bab 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -171,6 +171,10 @@ static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb)
 static void ip4_frag_init(struct inet_frag_queue *q, void *a)
 {
 	struct ipq *qp = container_of(q, struct ipq, q);
+	struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4,
+					       frags);
+	struct net *net = container_of(ipv4, struct net, ipv4);
+
 	struct ip4_create_arg *arg = a;
 
 	qp->protocol = arg->iph->protocol;
@@ -180,7 +184,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, void *a)
 	qp->daddr = arg->iph->daddr;
 	qp->user = arg->user;
 	qp->peer = sysctl_ipfrag_max_dist ?
-		inet_getpeer_v4(arg->iph->saddr, 1) : NULL;
+		inet_getpeer_v4(net, arg->iph->saddr, 1) : NULL;
 }
 
 static __inline__ void ip4_frag_free(struct inet_frag_queue *q)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e5b18b8..448e56b 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1328,9 +1328,10 @@ static u32 rt_peer_genid(void)
 
 void rt_bind_peer(struct rtable *rt, __be32 daddr, int create)
 {
+	struct net *net = dev_net(rt->dst.dev);
 	struct inet_peer *peer;
 
-	peer = inet_getpeer_v4(daddr, create);
+	peer = inet_getpeer_v4(net, daddr, create);
 
 	if (peer && cmpxchg(&rt->peer, NULL, peer) != NULL)
 		inet_putpeer(peer);
@@ -1694,7 +1695,7 @@ unsigned short ip_rt_frag_needed(struct net *net, const struct iphdr *iph,
 	unsigned short est_mtu = 0;
 	struct inet_peer *peer;
 
-	peer = inet_getpeer_v4(iph->daddr, 1);
+	peer = inet_getpeer_v4(net, iph->daddr, 1);
 	if (peer) {
 		unsigned short mtu = new_mtu;
 
@@ -1935,6 +1936,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
 static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
 			    struct fib_info *fi)
 {
+	struct net *net = dev_net(rt->dst.dev);
 	struct inet_peer *peer;
 	int create = 0;
 
@@ -1944,7 +1946,7 @@ static void rt_init_metrics(struct rtable *rt, const struct flowi4 *fl4,
 	if (fl4 && (fl4->flowi4_flags & FLOWI_FLAG_PRECOW_METRICS))
 		create = 1;
 
-	rt->peer = peer = inet_getpeer_v4(rt->rt_dst, create);
+	rt->peer = peer = inet_getpeer_v4(net, rt->rt_dst, create);
 	if (peer) {
 		rt->rt_peer_genid = rt_peer_genid();
 		if (inet_metrics_new(peer))
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a43b87d..50d4bee 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1822,11 +1822,12 @@ struct inet_peer *tcp_v4_get_peer(struct sock *sk, bool *release_it)
 {
 	struct rtable *rt = (struct rtable *) __sk_dst_get(sk);
 	struct inet_sock *inet = inet_sk(sk);
+	struct net *net = sock_net(sk);
 	struct inet_peer *peer;
 
 	if (!rt ||
 	    inet->cork.fl.u.ip4.daddr != inet->inet_daddr) {
-		peer = inet_getpeer_v4(inet->inet_daddr, 1);
+		peer = inet_getpeer_v4(net, inet->inet_daddr, 1);
 		*release_it = true;
 	} else {
 		if (!rt->peer)
@@ -1842,8 +1843,9 @@ EXPORT_SYMBOL(tcp_v4_get_peer);
 void *tcp_v4_tw_get_peer(struct sock *sk)
 {
 	const struct inet_timewait_sock *tw = inet_twsk(sk);
+	struct net *net = sock_net(sk);
 
-	return inet_getpeer_v4(tw->tw_daddr, 1);
+	return inet_getpeer_v4(net, tw->tw_daddr, 1);
 }
 EXPORT_SYMBOL(tcp_v4_tw_get_peer);
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 999a982..4eca013 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -306,9 +306,10 @@ static u32 rt6_peer_genid(void)
 
 void rt6_bind_peer(struct rt6_info *rt, int create)
 {
+	struct net *net = dev_net(rt->dst.dev);
 	struct inet_peer *peer;
 
-	peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
+	peer = inet_getpeer_v6(net, &rt->rt6i_dst.addr, create);
 	if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
 		inet_putpeer(peer);
 	else
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 554d599..56aae14 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1734,11 +1734,12 @@ static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it)
 {
 	struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct net *net = sock_net(sk);
 	struct inet_peer *peer;
 
 	if (!rt ||
 	    !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) {
-		peer = inet_getpeer_v6(&np->daddr, 1);
+		peer = inet_getpeer_v6(net, &np->daddr, 1);
 		*release_it = true;
 	} else {
 		if (!rt->rt6i_peer)
@@ -1754,11 +1755,12 @@ static void *tcp_v6_tw_get_peer(struct sock *sk)
 {
 	const struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
 	const struct inet_timewait_sock *tw = inet_twsk(sk);
+	struct net *net = sock_net(sk);
 
 	if (tw->tw_family == AF_INET)
 		return tcp_v4_tw_get_peer(sk);
 
-	return inet_getpeer_v6(&tw6->tw_v6_daddr, 1);
+	return inet_getpeer_v6(net, &tw6->tw_v6_daddr, 1);
 }
 
 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
-- 
1.7.7.6

^ permalink raw reply related

* [PATCH net-next 1/2] inetpeer: add namespace support for inetpeer
From: Gao feng @ 2012-06-05  2:23 UTC (permalink / raw)
  To: herbert-lOAM2aK0SrRLBo1qDEOMRrpzq4S04n8Q,
	steffen.klassert-opNxpl+3fjRBDgjK7y7TUQ,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	containers-cunTk1MwBs9QetFLy7KEm3xJsTq8ys+cHZ5vskTnxNA

now inetpeer doesn't support namespace,the information will
be leaking across namespace.

this patch move the global vars v4_peers and v6_peers to
netns_ipv4 and netns_ipv6 as a field peers.

add struct pernet_operations inetpeer_ops to initial pernet
inetpeer data.

and change family_to_base and inet_getpeer to support namespace.

Signed-off-by: Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
---
 include/net/inetpeer.h   |   10 +++++---
 include/net/netns/ipv4.h |    1 +
 include/net/netns/ipv6.h |    1 +
 net/ipv4/inetpeer.c      |   54 +++++++++++++++++++++++++++++++++++++++------
 net/ipv4/route.c         |    2 +-
 5 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index b94765e..4a50449 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -72,7 +72,9 @@ static inline bool inet_metrics_new(const struct inet_peer *p)
 }
 
 /* can be called with or without local BH being disabled */
-struct inet_peer	*inet_getpeer(const struct inetpeer_addr *daddr, int create);
+struct inet_peer *inet_getpeer(struct net *net,
+			       const struct inetpeer_addr *daddr,
+			       int create);
 
 static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create)
 {
@@ -80,7 +82,7 @@ static inline struct inet_peer *inet_getpeer_v4(__be32 v4daddr, int create)
 
 	daddr.addr.a4 = v4daddr;
 	daddr.family = AF_INET;
-	return inet_getpeer(&daddr, create);
+	return inet_getpeer(&init_net, &daddr, create);
 }
 
 static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr, int create)
@@ -89,14 +91,14 @@ static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr,
 
 	*(struct in6_addr *)daddr.addr.a6 = *v6daddr;
 	daddr.family = AF_INET6;
-	return inet_getpeer(&daddr, create);
+	return inet_getpeer(&init_net, &daddr, create);
 }
 
 /* can be called from BH context or outside */
 extern void inet_putpeer(struct inet_peer *p);
 extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout);
 
-extern void inetpeer_invalidate_tree(int family);
+extern void inetpeer_invalidate_tree(struct net *net, int family);
 
 /*
  * temporary check to make sure we dont access rid, ip_id_count, tcp_ts,
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index bbd023a..0855e09 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -31,6 +31,7 @@ struct netns_ipv4 {
 	struct sock		**icmp_sk;
 	struct sock		*tcp_sock;
 
+	struct inet_peer_base	*peers;
 	struct netns_frags	frags;
 #ifdef CONFIG_NETFILTER
 	struct xt_table		*iptable_filter;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index b42be53..df0a545 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -33,6 +33,7 @@ struct netns_ipv6 {
 	struct netns_sysctl_ipv6 sysctl;
 	struct ipv6_devconf	*devconf_all;
 	struct ipv6_devconf	*devconf_dflt;
+	struct inet_peer_base	*peers;
 	struct netns_frags	frags;
 #ifdef CONFIG_NETFILTER
 	struct xt_table		*ip6table_filter;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index d4d61b6..dafb8b0 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -90,13 +90,11 @@ struct inet_peer_base {
 
 static struct inet_peer_base v4_peers = {
 	.root		= peer_avl_empty_rcu,
-	.lock		= __SEQLOCK_UNLOCKED(v4_peers.lock),
 	.total		= 0,
 };
 
 static struct inet_peer_base v6_peers = {
 	.root		= peer_avl_empty_rcu,
-	.lock		= __SEQLOCK_UNLOCKED(v6_peers.lock),
 	.total		= 0,
 };
 
@@ -153,6 +151,41 @@ static void inetpeer_gc_worker(struct work_struct *work)
 	schedule_delayed_work(&gc_work, gc_delay);
 }
 
+static int __net_init inetpeer_net_init(struct net *net)
+{
+
+	net->ipv4.peers = kmemdup(&v4_peers,
+				  sizeof(v4_peers),
+				  GFP_KERNEL);
+	if (net->ipv4.peers == NULL)
+		return -1;
+	seqlock_init(&net->ipv4.peers->lock);
+	net->ipv6.peers = kmemdup(&v6_peers,
+				  sizeof(v6_peers),
+				  GFP_KERNEL);
+	if (net->ipv6.peers == NULL)
+		goto out_ipv6;
+	seqlock_init(&net->ipv6.peers->lock);
+	return 0;
+out_ipv6:
+	kfree(net->ipv4.peers);
+	return -1;
+}
+
+static void __net_exit inetpeer_net_exit(struct net *net)
+{
+	inetpeer_invalidate_tree(net, AF_INET);
+	kfree(net->ipv4.peers);
+
+	inetpeer_invalidate_tree(net, AF_INET6);
+	kfree(net->ipv6.peers);
+}
+
+static struct pernet_operations inetpeer_ops = {
+	.init = inetpeer_net_init,
+	.exit = inetpeer_net_exit,
+};
+
 /* Called from ip_output.c:ip_init  */
 void __init inet_initpeers(void)
 {
@@ -177,6 +210,7 @@ void __init inet_initpeers(void)
 			NULL);
 
 	INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker);
+	register_pernet_subsys(&inetpeer_ops);
 }
 
 static int addr_compare(const struct inetpeer_addr *a,
@@ -401,9 +435,10 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base,
 	call_rcu(&p->rcu, inetpeer_free_rcu);
 }
 
-static struct inet_peer_base *family_to_base(int family)
+static struct inet_peer_base *family_to_base(struct net *net,
+					     int family)
 {
-	return family == AF_INET ? &v4_peers : &v6_peers;
+	return family == AF_INET ? net->ipv4.peers : net->ipv6.peers;
 }
 
 /* perform garbage collect on all items stacked during a lookup */
@@ -443,10 +478,12 @@ static int inet_peer_gc(struct inet_peer_base *base,
 	return cnt;
 }
 
-struct inet_peer *inet_getpeer(const struct inetpeer_addr *daddr, int create)
+struct inet_peer *inet_getpeer(struct net *net,
+			       const struct inetpeer_addr *daddr,
+			       int create)
 {
 	struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr;
-	struct inet_peer_base *base = family_to_base(daddr->family);
+	struct inet_peer_base *base = family_to_base(net, daddr->family);
 	struct inet_peer *p;
 	unsigned int sequence;
 	int invalidated, gccnt = 0;
@@ -560,10 +597,10 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout)
 }
 EXPORT_SYMBOL(inet_peer_xrlim_allow);
 
-void inetpeer_invalidate_tree(int family)
+void inetpeer_invalidate_tree(struct net *net, int family)
 {
 	struct inet_peer *old, *new, *prev;
-	struct inet_peer_base *base = family_to_base(family);
+	struct inet_peer_base *base = family_to_base(net, family);
 
 	write_seqlock_bh(&base->lock);
 
@@ -586,3 +623,3 @@ out:
 	write_sequnlock_bh(&base->lock);
 }
 EXPORT_SYMBOL(inetpeer_invalidate_tree);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ffcb3b0..e5b18b8 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -938,7 +938,7 @@ static void rt_cache_invalidate(struct net *net)
 
 	get_random_bytes(&shuffle, sizeof(shuffle));
 	atomic_add(shuffle + 1U, &net->ipv4.rt_genid);
-	inetpeer_invalidate_tree(AF_INET);
+	inetpeer_invalidate_tree(net, AF_INET);
 }
 
 /*
-- 
1.7.7.6

^ permalink raw reply related

* Re: [PATCH 3/3] Revert Backoff [v3]: Calculate TCP's connection close threshold as a time value.
From: Jerry Chu @ 2012-06-04 23:57 UTC (permalink / raw)
  To: Ilpo Järvinen; +Cc: Damian Lukowski, Netdev, David Miller
In-Reply-To: <alpine.DEB.2.00.1206042116560.17280@melkinpaasi.cs.helsinki.fi>

On Mon, Jun 4, 2012 at 12:18 PM, Ilpo Järvinen
<ilpo.jarvinen@helsinki.fi> wrote:
> On Mon, 4 Jun 2012, Damian Lukowski wrote:
>
>> please verify, I understood you correctly.
>>
>> You have set TCP_RTO_MIN to a lower value, e.g. 0.002 seconds to improve
>> your internal low-latency traffic. Because of the improvement, R1
>> timeouts are triggered too fast for external high-RTT traffic. Is that
>> correct?
>> If so, may I suggest to set tcp_retries1 to a higher value? For
>> TCP_RTO_MIN == 0.002 and tcp_retries1 ==  10, R1 will be calculated to
>> approximately 4 seconds.
>>
>> Is that ok?
>
> I suppose what he meant is that you could have e.g., 60sec RTT and with
> small enough retries the timeout calculation yields to some timeout
> smaller than 60 secs, and therefore no retransmissions are made which is
> certainly not a desirable property? ...This is valid issue even if no min
> rto tweaking was done but can of course get much worse if min rto is
> shorter.

The extreme case you described above won't happen because there is a
point check at the beginning to return false if inet_csk(sk)->icsk_retransmits
is zero. But that seems to be a hack because why is 0 so special, not 1, 2,...?

>
> I agree with his proposed solution:
>
>> > I think retransmits_timed_out() should check against both time
>> > duration and retrans count (icsk_retransmits).
>
> ...that is, use both pseudo timeout check of the current code and the
> previously used icsk_retransmits compare at the same time.

Yep, it's not an ideal solution, i.e., the problem the original patch
tried to address
may continue to exist if the gap between TCP_RTO_MIN used as an estimator
and the real RTO is large, but it at least addresses the problem we
have locally.

I will submit a patch for this asap.

Thanks,

Jerry

>
> --
>  i.

^ permalink raw reply

* Re: [PATCH 3/3] Revert Backoff [v3]: Calculate TCP's connection close threshold as a time value.
From: Jerry Chu @ 2012-06-04 23:50 UTC (permalink / raw)
  To: Damian Lukowski; +Cc: Netdev, David Miller, Ilpo Järvinen
In-Reply-To: <1338832247.5299.4.camel@nexus>

Hi Damian,

On Mon, Jun 4, 2012 at 10:50 AM, Damian Lukowski
<damian@tvk.rwth-aachen.de> wrote:
> Hi Jerry,
>
> please verify, I understood you correctly.
>
> You have set TCP_RTO_MIN to a lower value, e.g. 0.002 seconds to improve
> your internal low-latency traffic. Because of the improvement, R1
> timeouts are triggered too fast for external high-RTT traffic. Is that
> correct?

Correct.

> If so, may I suggest to set tcp_retries1 to a higher value? For
> TCP_RTO_MIN == 0.002 and tcp_retries1 ==  10, R1 will be calculated to
> approximately 4 seconds.

I think hacking tcp_retries1 is the wrong solution. E.g., 10 retries may be too
generous for those short RTT flows.

I think the fundamental problem is - the ideal fix for your original RTO revert
problem should've used the per-flow RTO to compute R1 & R2. But that
computation may be too expensive so you used TCP_RTO_MIN as an
approximation - not a good idea IMHO!

The easiest solution I can see so far is to replace the check

if (!inet_csk(sk)->icsk_retransmits)
                return false;

at the beginning of retransmits_timed_out() with

if (inet_csk(sk)->icsk_retransmits < boundary)
                return false;

Best,

Jerry

>
> Is that ok?
>
> Best regards
>  Damian
>
> Am Freitag, den 01.06.2012, 15:58 -0700 schrieb Jerry Chu:
>> > From: Damian Lukowski <damian@tvk.rwth-aachen.de>
>> > Date: Wed, Aug 26, 2009 at 3:16 AM
>> > Subject: [PATCH 3/3] Revert Backoff [v3]: Calculate TCP's connection close
>> > threshold as a time value.
>> > To: Netdev <netdev@vger.kernel.org>
>> >
>> >
>> > RFC 1122 specifies two threshold values R1 and R2 for connection timeouts,
>> > which may represent a number of allowed retransmissions or a timeout value.
>> > Currently linux uses sysctl_tcp_retries{1,2} to specify the thresholds
>> > in number of allowed retransmissions.
>> >
>> > For any desired threshold R2 (by means of time) one can specify tcp_retries2
>> > (by means of number of retransmissions) such that TCP will not time out
>> > earlier than R2. This is the case, because the RTO schedule follows a fixed
>> > pattern, namely exponential backoff.
>> >
>> > However, the RTO behaviour is not predictable any more if RTO backoffs can
>> > be
>> > reverted, as it is the case in the draft
>> > "Make TCP more Robust to Long Connectivity Disruptions"
>> > (http://tools.ietf.org/html/draft-zimmermann-tcp-lcd).
>> >
>> > In the worst case TCP would time out a connection after 3.2 seconds, if the
>> > initial RTO equaled MIN_RTO and each backoff has been reverted.
>> >
>> > This patch introduces a function retransmits_timed_out(N),
>> > which calculates the timeout of a TCP connection, assuming an initial
>> > RTO of MIN_RTO and N unsuccessful, exponentially backed-off retransmissions.
>> >
>> > Whenever timeout decisions are made by comparing the retransmission counter
>> > to some value N, this function can be used, instead.
>> >
>> > The meaning of tcp_retries2 will be changed, as many more RTO
>> > retransmissions
>> > can occur than the value indicates. However, it yields a timeout which is
>> > similar to the one of an unpatched, exponentially backing off TCP in the
>> > same
>> > scenario. As no application could rely on an RTO greater than MIN_RTO, there
>> > should be no risk of a regression.
>>
>> This looks like a typical "fix one problem, introducing a few more" patch :(.
>> What do you mean by "no application could rely on an RTO greater than
>> MIN_RTO..."
>> above? How can you make the assumption that RTO is not too far off
>> from TCP_RTO_MIN?
>>
>> While you tried to address a problem where the retransmission count
>> was high but the actual
>> timeout duration was too short, have you considered the other case
>> around, i.e., the timeout
>> duration is long but the retransmission count is too short? This is
>> exactly what's happening
>> to us with your patch. We've much reduced TCP_RTO_MIN for our internal
>> traffic, but not
>> noticing your change has severely shortened the R1 & R2 recommended by
>> RFC1122 for our
>> long haul traffic until now. In many cases R1 threshold was met upon
>> the first retrans timeout.
>>
>> I think retransmits_timed_out() should check against both time
>> duration and retrans count
>> (icsk_retransmits).
>>
>> Thought?
>>
>> Jerry
>>
>> >
>> > Signed-off-by: Damian Lukowski <damian@tvk.rwth-aachen.de>
>> > ---
>> >  include/net/tcp.h    |   18 ++++++++++++++++++
>> >  net/ipv4/tcp_timer.c |   11 +++++++----
>> >  2 files changed, 25 insertions(+), 4 deletions(-)
>> >
>> > diff --git a/include/net/tcp.h b/include/net/tcp.h
>> > index c35b329..17d1a88 100644
>> > --- a/include/net/tcp.h
>> > +++ b/include/net/tcp.h
>> > @@ -1247,6 +1247,24 @@ static inline struct sk_buff
>> > *tcp_write_queue_prev(struct sock *sk, struct sk_bu
>> >  #define tcp_for_write_queue_from_safe(skb, tmp, sk)                    \
>> >        skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp)
>> >
>> > +static inline bool retransmits_timed_out(const struct sock *sk,
>> > +                                        unsigned int boundary)
>> > +{
>> > +       int limit, K;
>> > +       if (!inet_csk(sk)->icsk_retransmits)
>> > +               return false;
>> > +
>> > +       K = ilog2(TCP_RTO_MAX/TCP_RTO_MIN);
>> > +
>> > +       if (boundary <= K)
>> > +               limit = ((2 << boundary) - 1) * TCP_RTO_MIN;
>> > +       else
>> > +               limit = ((2 << K) - 1) * TCP_RTO_MIN +
>> > +                       (boundary - K) * TCP_RTO_MAX;
>> > +
>> > +       return (tcp_time_stamp - tcp_sk(sk)->retrans_stamp) >= limit;
>> > +}
>> > +
>> >  static inline struct sk_buff *tcp_send_head(struct sock *sk)
>> >  {
>> >        return sk->sk_send_head;
>> > diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
>> > index a3ba494..2972d7b 100644
>> > --- a/net/ipv4/tcp_timer.c
>> > +++ b/net/ipv4/tcp_timer.c
>> > @@ -137,13 +137,14 @@ static int tcp_write_timeout(struct sock *sk)
>> >  {
>> >        struct inet_connection_sock *icsk = inet_csk(sk);
>> >        int retry_until;
>> > +       bool do_reset;
>> >
>> >        if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
>> >                if (icsk->icsk_retransmits)
>> >                        dst_negative_advice(&sk->sk_dst_cache);
>> >                retry_until = icsk->icsk_syn_retries ? :
>> > sysctl_tcp_syn_retries;
>> >        } else {
>> > -               if (icsk->icsk_retransmits >= sysctl_tcp_retries1) {
>> > +               if (retransmits_timed_out(sk, sysctl_tcp_retries1)) {
>> >                        /* Black hole detection */
>> >                        tcp_mtu_probing(icsk, sk);
>> >
>> > @@ -155,13 +156,15 @@ static int tcp_write_timeout(struct sock *sk)
>> >                        const int alive = (icsk->icsk_rto < TCP_RTO_MAX);
>> >
>> >                        retry_until = tcp_orphan_retries(sk, alive);
>> > +                       do_reset = alive ||
>> > +                                  !retransmits_timed_out(sk, retry_until);
>> >
>> > -                       if (tcp_out_of_resources(sk, alive ||
>> > icsk->icsk_retransmits < retry_until))
>> > +                       if (tcp_out_of_resources(sk, do_reset))
>> >                                return 1;
>> >                }
>> >        }
>> >
>> > -       if (icsk->icsk_retransmits >= retry_until) {
>> > +       if (retransmits_timed_out(sk, retry_until)) {
>> >                /* Has it gone just too far? */
>> >                tcp_write_err(sk);
>> >                return 1;
>> > @@ -385,7 +388,7 @@ void tcp_retransmit_timer(struct sock *sk)
>> >  out_reset_timer:
>> >        icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
>> >        inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto,
>> > TCP_RTO_MAX);
>> > -       if (icsk->icsk_retransmits > sysctl_tcp_retries1)
>> > +       if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1))
>> >                __sk_dst_reset(sk);
>> >
>> >  out:;
>> > --
>> > 1.6.3.3
>> >
>> > --
>> > To unsubscribe from this list: send the line "unsubscribe netdev" in
>> > the body of a message to majordomo@vger.kernel.org
>> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> >
>
>

^ permalink raw reply

* Re: Reoccuring kern.log events after running xl2tp with ethernet adapter Realtek 8111E
From: Francois Romieu @ 2012-06-04 23:10 UTC (permalink / raw)
  To: Dustin Schumm; +Cc: netdev
In-Reply-To: <CANcVnzi2mXKXZLcaAt9rJ9OOdK_2=V8JV9uebVeBcsDu6j+DhQ@mail.gmail.com>

Dustin Schumm <shodid@gmail.com> :
[...]
> I'm more than happy to provide whatever more is needed.

It does not look like our usual 816x bug reports... Neither does your
setup actually.

Which kind of 816x is it (see 'dmesg | grep XID' output) ?

-- 
Ueimor

^ permalink raw reply

* [PATCH IPROUTE2] tc-fq_codel: Add manpage
From: Vijay Subramanian @ 2012-06-04 22:55 UTC (permalink / raw)
  To: netdev; +Cc: Stephen Hemminger, Eric Dumazet, Dave Taht, Vijay Subramanian

This patch adds the manpage for the FQ_CoDel (Fair Queuing Controlled-Delay)
AQM.

Signed-off-by: Vijay Subramanian <subramanian.vijay@gmail.com>
---
 man/man8/Makefile      |    2 +-
 man/man8/tc-fq_codel.8 |  108 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 109 insertions(+), 1 deletions(-)
 create mode 100644 man/man8/tc-fq_codel.8

diff --git a/man/man8/Makefile b/man/man8/Makefile
index 6d6242e..6793873 100644
--- a/man/man8/Makefile
+++ b/man/man8/Makefile
@@ -6,7 +6,7 @@ MAN8PAGES = $(TARGETS) ip.8 arpd.8 lnstat.8 routel.8 rtacct.8 rtmon.8 ss.8 \
 	tc-tbf.8 tc.8 rtstat.8 ctstat.8 nstat.8 routef.8 tc-codel.8 \
 	tc-sfb.8 tc-netem.8 tc-choke.8 ip-tunnel.8 ip-rule.8 ip-ntable.8 \
 	ip-monitor.8 tc-stab.8 tc-hfsc.8 ip-xfrm.8 ip-netns.8 \
-	ip-neighbour.8 ip-mroute.8 ip-maddress.8 ip-addrlabel.8
+	ip-neighbour.8 ip-mroute.8 ip-maddress.8 ip-addrlabel.8 tc-fq_codel.8
 
 
 all: $(TARGETS)
diff --git a/man/man8/tc-fq_codel.8 b/man/man8/tc-fq_codel.8
new file mode 100644
index 0000000..5c128af
--- /dev/null
+++ b/man/man8/tc-fq_codel.8
@@ -0,0 +1,108 @@
+.TH FQ_CoDel 8 "4 June 2012" "iproute2" "Linux"
+.SH NAME
+CoDel \- Fair Queuing (FQ) with Controlled Delay (CoDel)
+.SH SYNOPSIS
+.B tc qdisc ... fq_codel
+[
+.B limit
+PACKETS ] [
+.B flows
+NUMBER ] [
+.B target
+TIME ] [
+.B interval
+TIME ] [
+.B quantum
+BYTES ] [
+.B ecn
+|
+.B noecn
+]
+
+.SH DESCRIPTION
+FQ_Codel (Fair Queuing Controled Delay) is queuing discipline that combines Fair
+Queuing with the CoDel AQM scheme. FQ_Codel uses a stochastic model to classify
+incoming packets into different flows and is used to provide a fair share of the
+bandwidth to all the flows using the queue. Each such flow is managed by the
+CoDel queuing discipline. Reordering within a flow is avoided since Codel
+internally uses a FIFO queue.
+
+.SH PARAMETERS
+.SS limit
+has the same semantics as
+.B codel
+and is the hard limit on the real queue size.
+When this limit is reached, incoming packets are dropped. Default is 10240
+packets.
+
+.SS flows
+is the number of flows into which the incoming packets are classified. Due to
+the stochastic nature of hashing, multiple flows may end up being hashed into
+the same slot. Newer flows have priority over older ones. This parameter can be
+set only at load time since memory has to be allocated for the hash table.
+Default value is 1024.
+
+.SS target
+has the same semantics as
+.B codel
+and is the acceptable minimum
+standing/persistent queue delay. This minimum delay is identified by tracking
+the local minimum queue delay that packets experience.  Default value is 5ms.
+
+.SS interval
+has the same semantics as
+.B codel
+and is used to ensure that the measured minimum delay does not become too stale.
+The minimum delay must be experienced in the last epoch of length .B interval.
+It should be set on the order of the worst-case RTT through the bottleneck to
+give endpoints sufficient time to react.  Default value is 100ms.
+
+.SS quantum
+is the number of bytes used as 'deficit' in the fair queuing algorithm. Default
+is set to 1514 bytes which corresponds to the Ethernet MTU plus the hardware
+header length of 14 bytes.
+
+.SS ecn | noecn
+has the same semantics as
+.B codel
+and can be used to mark packets instead of dropping them.  If
+.B ecn
+has been enabled,
+.B noecn
+can be used to turn it off and vice-a-versa. Unlike
+.B codel, ecn
+is turned on by default.
+
+.SH EXAMPLES
+#tc qdisc add   dev eth0 root fq_codel
+.br
+#tc -s qdisc show
+.br
+qdisc fq_codel 8002: dev eth0 root refcnt 2 limit 10240p flows 1024 quantum 1514
+ target 5.0ms interval 100.0ms ecn
+   Sent 428514 bytes 2269 pkt (dropped 0, overlimits 0 requeues 0)
+   backlog 0b 0p requeues 0
+    maxpacket 256 drop_overlimit 0 new_flow_count 0 ecn_mark 0
+    new_flows_len 0 old_flows_len 0
+
+#tc qdisc add dev eth0 root fq_codel limit 2000 target 3ms interval 40ms noecn
+.br
+#tc -s qdisc show
+.br
+qdisc fq_codel 8003: dev eth0 root refcnt 2 limit 2000p flows 1024 quantum 1514
+target 3.0ms interval 40.0ms
+ Sent 2588985006 bytes 1783629 pkt (dropped 0, overlimits 0 requeues 34869)
+ backlog 0b 0p requeues 34869
+  maxpacket 65226 drop_overlimit 0 new_flow_count 73 ecn_mark 0
+  new_flows_len 1 old_flows_len 3
+
+
+.SH SEE ALSO
+.BR tc (8),
+.BR tc-codel (8),
+.BR tc-red (8)
+
+.SH AUTHORS
+FQ_CoDel was implemented by Eric Dumazet. This manpage was written
+by Vijay Subramanian. Please report corrections to the Linux Networking
+mailing list <netdev@vger.kernel.org>.
-- 
1.7.0.4

^ permalink raw reply related

* Re: linux-next: Tree for Apr 12
From: Eric Paris @ 2012-06-04 22:46 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Stephen Rothwell, linux-next, LKML, netdev, James Morris,
	Stephen Smalley
In-Reply-To: <20120604150455.7a80b311.akpm@linux-foundation.org>

On Mon, 2012-06-04 at 15:04 -0700, Andrew Morton wrote:
> On Thu, 12 Apr 2012 14:24:15 -0700
> Andrew Morton <akpm@linux-foundation.org> wrote:
> 
> > On Thu, 12 Apr 2012 14:59:31 +1000
> > Stephen Rothwell <sfr@canb.auug.org.au> wrote:
> > 
> > > I have created today's linux-next tree at
> > > git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
> > 
> > This isn't working for me.  Some time between April 3 and April 12
> > someone merged something into the non-mm part of linux-next which broke
> > ssh.
> > 
> > I boot the box and everything seems to come up OK, but attemtps to ssh
> > into the machine fail with
> > 
> > X11 forwarding request failed on channel 0
> > Last login: Thu Apr 12 13:04:35 2012 from akpm.corp.google.com
> > Connection to akpm2 closed.
> > 
> > I took a peek in the `strace ssh' output.
> > 
> > Good:
> > 
> > 17815 write(5, "Last login: Thu Apr 12 13:27:23 "..., 65) = 65
> > 17815 select(7, [3 4], [], NULL, {120, 0}) = 1 (in [3], left {119, 770798})
> > 17815 read(3, "\21O\200\366Mv\343\222\332\251\2403L\376Y18\2047\336\244\226p-+X\2%\2119\314\255"..., 8192) = 80
> > 17815 select(7, [3 4], [5], NULL, {120, 0}) = 1 (out [5], left {119, 999987})
> > 17815 write(5, "\r\33[m\17\33[27m\33[24m\33[Jakpm2:/home/ak"..., 39) = 39
> > 17815 select(7, [3 4], [], NULL, {120, 0}) = 1 (in [4], left {118, 801111})
> > 17815 read(4, "\4", 16384)              = 1
> > 17815 select(7, [3 4], [3], NULL, {120, 0}) = 1 (out [3], left {119, 999991})
> > 17815 write(3, "\235J\5\340\234\21\266\207\26e\362\327\2\332\1\267\272\200\364\267?/\320L\341\35\350{+M:\222"..., 48) = 48
> > 
> > 
> > Bad:
> > 
> > 9305  write(5, "Last login: Thu Apr 12 13:02:54 "..., 65) = 65
> > 9305  select(7, [3 4], [], NULL, {120, 0}) = 1 (in [3], left {119, 945541})
> > 9305  read(3, "f\357\250~\260i\2259\320\3258\262)O\364;_\251\360-\314\31\374]\326\300\356\364\370S\3105"..., 8192) = 128
> > 9305  close(5)                          = 0
> > 9305  close(4)                          = 0
> > 
> > That read() is returning a lot more data.
> > 
> > It appears that we've done something which breaks X forwarding.  I
> > won't be able to look any further into this until Monday.
> 
> This regression is now in mainline.  I've bisected it to an SELinux
> patch, below.  I have confirmed that reverting just that patch from
> current mainline fixes the regression.
> 
> Using openssh-server-4.3p2-14.fc6 on FC6.
> 
> 
> commit 95dbf739313f09c8d859bde1373bc264ef979337
> Author:     Eric Paris <eparis@redhat.com>
> AuthorDate: Wed Apr 4 13:45:34 2012 -0400
> Commit:     Eric Paris <eparis@redhat.com>
> CommitDate: Mon Apr 9 12:22:49 2012 -0400
> 
>     SELinux: check OPEN on truncate calls
>     
>     In RH BZ 578841 we realized that the SELinux sandbox program was allowed to
>     truncate files outside of the sandbox.  The reason is because sandbox
>     confinement is determined almost entirely by the 'open' permission.  The idea
>     was that if the sandbox was unable to open() files it would be unable to do
>     harm to those files.  This turns out to be false in light of syscalls like
>     truncate() and chmod() which don't require a previous open() call.  I looked
>     at the syscalls that did not have an associated 'open' check and found that
>     truncate(), did not have a seperate permission and even if it did have a
>     separate permission such a permission owuld be inadequate for use by
>     sandbox (since it owuld have to be granted so liberally as to be useless).
>     This patch checks the OPEN permission on truncate.  I think a better solution
>     for sandbox is a whole new permission, but at least this fixes what we have
>     today.
>     
>     Signed-off-by: Eric Paris <eparis@redhat.com>
> 
> diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
> index d85b793..f7d7e77 100644
> --- a/security/selinux/hooks.c
> +++ b/security/selinux/hooks.c
> @@ -2708,6 +2708,7 @@ static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr)
>  {
>  	const struct cred *cred = current_cred();
>  	unsigned int ia_valid = iattr->ia_valid;
> +	__u32 av = FILE__WRITE;
>  
>  	/* ATTR_FORCE is just used for ATTR_KILL_S[UG]ID. */
>  	if (ia_valid & ATTR_FORCE) {
> @@ -2721,7 +2722,10 @@ static int selinux_inode_setattr(struct dentry *dentry, struct iattr *iattr)
>  			ATTR_ATIME_SET | ATTR_MTIME_SET | ATTR_TIMES_SET))
>  		return dentry_has_perm(cred, dentry, FILE__SETATTR);
>  
> -	return dentry_has_perm(cred, dentry, FILE__WRITE);
> +	if (ia_valid & ATTR_SIZE)
> +		av |= FILE__OPEN;
> +
> +	return dentry_has_perm(cred, dentry, av);
>  }
>  
>  static int selinux_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)

Very odd indeed...  I can only assume you are SELinux enforcing and have
a denial every time this fails.  Can you send me that denial?

I really do not want to revert this and feel that the only right fix is
going to be to update your selinux policy to allow this new check.  I'd
rather not allow (whatever program) to truncate() files willy-nilly (in
violation of the intentions of selinux policy)

I'm sorry I never saw it sooner.  We've had it in RHEL for even longer
than the 3 months it's been in -next.  I think the 'right' fix is going
to have to be an update to SELinux policy (for your long dead system, if
you give me the denial I can build you a new policy) rather than leaving
the potential security hole in mainline...

-Eric

^ permalink raw reply

* [PATCH net-next 3/3] drivers: net: Remove casts to same type
From: Joe Perches @ 2012-06-04 22:44 UTC (permalink / raw)
  To: David S. Miller; +Cc: netdev
In-Reply-To: <cover.1338849364.git.joe@perches.com>

Adding casts of objects to the same type is unnecessary
and confusing for a human reader.

For example, this cast:

        int y;
        int *p = (int *)&y;

I used the coccinelle script below to find and remove these
unnecessary casts.  I manually removed the conversions this
script produces of casts with __force, __iomem and __user.

@@
type T;
T *p;
@@

-       (T *)p
+       p

Signed-off-by: Joe Perches <joe@perches.com>
---
 drivers/net/appletalk/cops.c      |    2 +-
 drivers/net/can/bfin_can.c        |    2 +-
 drivers/net/can/mcp251x.c         |    3 +--
 drivers/net/fddi/defxx.c          |    4 ++--
 drivers/net/fddi/skfp/pmf.c       |    8 ++++----
 drivers/net/hamradio/mkiss.c      |    8 ++++----
 drivers/net/hyperv/netvsc.c       |    2 +-
 drivers/net/irda/ali-ircc.c       |    6 +++---
 drivers/net/irda/au1k_ir.c        |    2 +-
 drivers/net/slip/slip.c           |    4 ++--
 drivers/net/vmxnet3/vmxnet3_drv.c |    2 +-
 drivers/net/wan/x25_asy.c         |    2 +-
 drivers/net/wimax/i2400m/fw.c     |    2 +-
 13 files changed, 23 insertions(+), 24 deletions(-)

diff --git a/drivers/net/appletalk/cops.c b/drivers/net/appletalk/cops.c
index dd5e048..545c09e 100644
--- a/drivers/net/appletalk/cops.c
+++ b/drivers/net/appletalk/cops.c
@@ -936,7 +936,7 @@ static int cops_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
         struct cops_local *lp = netdev_priv(dev);
         struct sockaddr_at *sa = (struct sockaddr_at *)&ifr->ifr_addr;
-        struct atalk_addr *aa = (struct atalk_addr *)&lp->node_addr;
+        struct atalk_addr *aa = &lp->node_addr;
 
         switch(cmd)
         {
diff --git a/drivers/net/can/bfin_can.c b/drivers/net/can/bfin_can.c
index 3f88473..ea31438 100644
--- a/drivers/net/can/bfin_can.c
+++ b/drivers/net/can/bfin_can.c
@@ -597,7 +597,7 @@ static int __devinit bfin_can_probe(struct platform_device *pdev)
 	dev_info(&pdev->dev,
 		"%s device registered"
 		"(&reg_base=%p, rx_irq=%d, tx_irq=%d, err_irq=%d, sclk=%d)\n",
-		DRV_NAME, (void *)priv->membase, priv->rx_irq,
+		DRV_NAME, priv->membase, priv->rx_irq,
 		priv->tx_irq, priv->err_irq, priv->can.clock.freq);
 	return 0;
 
diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c
index 346785c..9120a36 100644
--- a/drivers/net/can/mcp251x.c
+++ b/drivers/net/can/mcp251x.c
@@ -1020,8 +1020,7 @@ static int __devinit mcp251x_can_probe(struct spi_device *spi)
 						      GFP_DMA);
 
 		if (priv->spi_tx_buf) {
-			priv->spi_rx_buf = (u8 *)(priv->spi_tx_buf +
-						  (PAGE_SIZE / 2));
+			priv->spi_rx_buf = (priv->spi_tx_buf + (PAGE_SIZE / 2));
 			priv->spi_rx_dma = (dma_addr_t)(priv->spi_tx_dma +
 							(PAGE_SIZE / 2));
 		} else {
diff --git a/drivers/net/fddi/defxx.c b/drivers/net/fddi/defxx.c
index 4ad80f7..6695a1d 100644
--- a/drivers/net/fddi/defxx.c
+++ b/drivers/net/fddi/defxx.c
@@ -2962,7 +2962,7 @@ static int dfx_rcv_init(DFX_board_t *bp, int get_buffers)
 			bp->descr_block_virt->rcv_data[i+j].long_0 = (u32) (PI_RCV_DESCR_M_SOP |
 				((PI_RCV_DATA_K_SIZE_MAX / PI_ALIGN_K_RCV_DATA_BUFF) << PI_RCV_DESCR_V_SEG_LEN));
 			bp->descr_block_virt->rcv_data[i+j].long_1 = (u32) (bp->rcv_block_phys + (i * PI_RCV_DATA_K_SIZE_MAX));
-			bp->p_rcv_buff_va[i+j] = (char *) (bp->rcv_block_virt + (i * PI_RCV_DATA_K_SIZE_MAX));
+			bp->p_rcv_buff_va[i+j] = (bp->rcv_block_virt + (i * PI_RCV_DATA_K_SIZE_MAX));
 			}
 #endif
 	}
@@ -3030,7 +3030,7 @@ static void dfx_rcv_queue_process(
 #ifdef DYNAMIC_BUFFERS
 		p_buff = (char *) (((struct sk_buff *)bp->p_rcv_buff_va[entry])->data);
 #else
-		p_buff = (char *) bp->p_rcv_buff_va[entry];
+		p_buff = bp->p_rcv_buff_va[entry];
 #endif
 		memcpy(&descr, p_buff + RCV_BUFF_K_DESCR, sizeof(u32));
 
diff --git a/drivers/net/fddi/skfp/pmf.c b/drivers/net/fddi/skfp/pmf.c
index 9ac4665..24d8566 100644
--- a/drivers/net/fddi/skfp/pmf.c
+++ b/drivers/net/fddi/skfp/pmf.c
@@ -1242,7 +1242,7 @@ static int smt_set_para(struct s_smc *smc, struct smt_para *pa, int index,
 			if (len < 8)
 				goto len_error ;
 			if (set)
-				memcpy((char *) to,(char *) from+2,6) ;
+				memcpy(to,from+2,6) ;
 			to += 8 ;
 			from += 8 ;
 			len -= 8 ;
@@ -1251,7 +1251,7 @@ static int smt_set_para(struct s_smc *smc, struct smt_para *pa, int index,
 			if (len < 4)
 				goto len_error ;
 			if (set)
-				memcpy((char *) to,(char *) from,4) ;
+				memcpy(to,from,4) ;
 			to += 4 ;
 			from += 4 ;
 			len -= 4 ;
@@ -1260,7 +1260,7 @@ static int smt_set_para(struct s_smc *smc, struct smt_para *pa, int index,
 			if (len < 8)
 				goto len_error ;
 			if (set)
-				memcpy((char *) to,(char *) from,8) ;
+				memcpy(to,from,8) ;
 			to += 8 ;
 			from += 8 ;
 			len -= 8 ;
@@ -1269,7 +1269,7 @@ static int smt_set_para(struct s_smc *smc, struct smt_para *pa, int index,
 			if (len < 32)
 				goto len_error ;
 			if (set)
-				memcpy((char *) to,(char *) from,32) ;
+				memcpy(to,from,32) ;
 			to += 32 ;
 			from += 32 ;
 			len -= 32 ;
diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index aed1a61..2c0894a 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -485,7 +485,7 @@ static void ax_encaps(struct net_device *dev, unsigned char *icp, int len)
 
 			return;
 		default:
-			count = kiss_esc(p, (unsigned char *)ax->xbuff, len);
+			count = kiss_esc(p, ax->xbuff, len);
 		}
 	} else {
 		unsigned short crc;
@@ -497,7 +497,7 @@ static void ax_encaps(struct net_device *dev, unsigned char *icp, int len)
 		case CRC_MODE_SMACK:
 			*p |= 0x80;
 			crc = swab16(crc16(0, p, len));
-			count = kiss_esc_crc(p, (unsigned char *)ax->xbuff, crc, len+2);
+			count = kiss_esc_crc(p, ax->xbuff, crc, len+2);
 			break;
 		case CRC_MODE_FLEX_TEST:
 			ax->crcmode = CRC_MODE_NONE;
@@ -506,11 +506,11 @@ static void ax_encaps(struct net_device *dev, unsigned char *icp, int len)
 		case CRC_MODE_FLEX:
 			*p |= 0x20;
 			crc = calc_crc_flex(p, len);
-			count = kiss_esc_crc(p, (unsigned char *)ax->xbuff, crc, len+2);
+			count = kiss_esc_crc(p, ax->xbuff, crc, len+2);
 			break;
 
 		default:
-			count = kiss_esc(p, (unsigned char *)ax->xbuff, len);
+			count = kiss_esc(p, ax->xbuff, len);
 		}
   	}
 	spin_unlock_bh(&ax->buflock);
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 8b91947..4363c76 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -614,7 +614,7 @@ retry_send_cmplt:
 static void netvsc_receive_completion(void *context)
 {
 	struct hv_netvsc_packet *packet = context;
-	struct hv_device *device = (struct hv_device *)packet->device;
+	struct hv_device *device = packet->device;
 	struct netvsc_device *net_device;
 	u64 transaction_id = 0;
 	bool fsend_receive_comp = false;
diff --git a/drivers/net/irda/ali-ircc.c b/drivers/net/irda/ali-ircc.c
index dcc80d6..8487204 100644
--- a/drivers/net/irda/ali-ircc.c
+++ b/drivers/net/irda/ali-ircc.c
@@ -1017,7 +1017,7 @@ static void ali_ircc_fir_change_speed(struct ali_ircc_cb *priv, __u32 baud)
 {
 		
 	int iobase; 
-	struct ali_ircc_cb *self = (struct ali_ircc_cb *) priv;
+	struct ali_ircc_cb *self = priv;
 	struct net_device *dev;
 
 	IRDA_DEBUG(1, "%s(), ---------------- Start ----------------\n", __func__ );
@@ -1052,7 +1052,7 @@ static void ali_ircc_fir_change_speed(struct ali_ircc_cb *priv, __u32 baud)
  */
 static void ali_ircc_sir_change_speed(struct ali_ircc_cb *priv, __u32 speed)
 {
-	struct ali_ircc_cb *self = (struct ali_ircc_cb *) priv;
+	struct ali_ircc_cb *self = priv;
 	unsigned long flags;
 	int iobase; 
 	int fcr;    /* FIFO control reg */
@@ -1121,7 +1121,7 @@ static void ali_ircc_sir_change_speed(struct ali_ircc_cb *priv, __u32 speed)
 static void ali_ircc_change_dongle_speed(struct ali_ircc_cb *priv, int speed)
 {
 	
-	struct ali_ircc_cb *self = (struct ali_ircc_cb *) priv;
+	struct ali_ircc_cb *self = priv;
 	int iobase,dongle_id;
 	int tmp = 0;
 			
diff --git a/drivers/net/irda/au1k_ir.c b/drivers/net/irda/au1k_ir.c
index fc503aa..e09417d 100644
--- a/drivers/net/irda/au1k_ir.c
+++ b/drivers/net/irda/au1k_ir.c
@@ -794,7 +794,7 @@ static int __devinit au1k_irda_net_init(struct net_device *dev)
 
 	/* allocate the data buffers */
 	aup->db[0].vaddr =
-		(void *)dma_alloc(MAX_BUF_SIZE * 2 * NUM_IR_DESC, &temp);
+		dma_alloc(MAX_BUF_SIZE * 2 * NUM_IR_DESC, &temp);
 	if (!aup->db[0].vaddr)
 		goto out3;
 
diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
index d4c9db3..a34d6bf 100644
--- a/drivers/net/slip/slip.c
+++ b/drivers/net/slip/slip.c
@@ -390,10 +390,10 @@ static void sl_encaps(struct slip *sl, unsigned char *icp, int len)
 #endif
 #ifdef CONFIG_SLIP_MODE_SLIP6
 	if (sl->mode & SL_MODE_SLIP6)
-		count = slip_esc6(p, (unsigned char *) sl->xbuff, len);
+		count = slip_esc6(p, sl->xbuff, len);
 	else
 #endif
-		count = slip_esc(p, (unsigned char *) sl->xbuff, len);
+		count = slip_esc(p, sl->xbuff, len);
 
 	/* Order of next two lines is *very* important.
 	 * When we are sending a little amount of data,
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 3f04ba0..93e0cfb 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1037,7 +1037,7 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 #endif
 	dev_dbg(&adapter->netdev->dev,
 		"txd[%u]: SOP 0x%Lx 0x%x 0x%x\n",
-		(u32)((union Vmxnet3_GenericDesc *)ctx.sop_txd -
+		(u32)(ctx.sop_txd -
 		tq->tx_ring.base), le64_to_cpu(gdesc->txd.addr),
 		le32_to_cpu(gdesc->dword[2]), le32_to_cpu(gdesc->dword[3]));
 
diff --git a/drivers/net/wan/x25_asy.c b/drivers/net/wan/x25_asy.c
index d7a65e1..44db8b7 100644
--- a/drivers/net/wan/x25_asy.c
+++ b/drivers/net/wan/x25_asy.c
@@ -231,7 +231,7 @@ static void x25_asy_encaps(struct x25_asy *sl, unsigned char *icp, int len)
 	}
 
 	p = icp;
-	count = x25_asy_esc(p, (unsigned char *) sl->xbuff, len);
+	count = x25_asy_esc(p, sl->xbuff, len);
 
 	/* Order of next two lines is *very* important.
 	 * When we are sending a little amount of data,
diff --git a/drivers/net/wimax/i2400m/fw.c b/drivers/net/wimax/i2400m/fw.c
index 7cbd7d2..d09e449 100644
--- a/drivers/net/wimax/i2400m/fw.c
+++ b/drivers/net/wimax/i2400m/fw.c
@@ -1268,7 +1268,7 @@ int i2400m_fw_check(struct i2400m *i2400m, const void *bcf, size_t bcf_size)
 		size_t leftover, offset, header_len, size;
 
 		leftover = top - itr;
-		offset = itr - (const void *) bcf;
+		offset = itr - bcf;
 		if (leftover <= sizeof(*bcf_hdr)) {
 			dev_err(dev, "firmware %s: %zu B left at @%zx, "
 				"not enough for BCF header\n",
-- 
1.7.8.111.gad25c.dirty

^ permalink raw reply related

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox