Netdev List
 help / color / mirror / Atom feed
* [ETH]: Combine format_addr() with print_mac().
From: Michael Chan @ 2007-12-21 22:05 UTC (permalink / raw)
  To: davem; +Cc: netdev, anilgv, joe, michaelc, david.somayajulu

[ETH]: Combine format_addr() with print_mac().

print_mac() used by most net drivers and format_addr() used by
net-sysfs.c are very similar and they can be integrated.

format_addr() is also identically redefined in the qla4xxx iscsi
driver.

Export a new function format_mac_addr() to be used by net-sysfs,
qla4xxx and others in the future.  Both print_mac() and
format_mac_addr() call _format_mac_addr() to do the formatting.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Cc: Joe Perches <joe@perches.com>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: David Somayajulu <david.somayajulu@qlogic.com>

diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 89460d2..0f4562b 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -173,18 +173,6 @@ static void qla4xxx_conn_stop(struct iscsi_cls_conn *conn, int flag)
 		printk(KERN_ERR "iscsi: invalid stop flag %d\n", flag);
 }
 
-static ssize_t format_addr(char *buf, const unsigned char *addr, int len)
-{
-	int i;
-	char *cp = buf;
-
-	for (i = 0; i < len; i++)
-		cp += sprintf(cp, "%02x%c", addr[i],
-			      i == (len - 1) ? '\n' : ':');
-	return cp - buf;
-}
-
-
 static int qla4xxx_host_get_param(struct Scsi_Host *shost,
 				  enum iscsi_host_param param, char *buf)
 {
@@ -193,7 +181,7 @@ static int qla4xxx_host_get_param(struct Scsi_Host *shost,
 
 	switch (param) {
 	case ISCSI_HOST_PARAM_HWADDRESS:
-		len = format_addr(buf, ha->my_mac, MAC_ADDR_LEN);
+		len = format_mac_addr(buf, ha->my_mac, MAC_ADDR_LEN);
 		break;
 	case ISCSI_HOST_PARAM_IPADDRESS:
 		len = sprintf(buf, "%d.%d.%d.%d\n", ha->ip_address[0],
diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index cc002cb..d20512c 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -124,10 +124,11 @@ int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr);
 extern struct ctl_table ether_table[];
 #endif
 
+extern ssize_t format_mac_addr(char *buf, const unsigned char *addr, int len);
+
 /*
  *	Display a 6 byte device address (MAC) in a readable format.
  */
-#define MAC_FMT "%02x:%02x:%02x:%02x:%02x:%02x"
 extern char *print_mac(char *buf, const u8 *addr);
 #define DECLARE_MAC_BUF(var) char var[18] __maybe_unused
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index e41f4b9..e72993b 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -95,17 +95,6 @@ NETDEVICE_SHOW(type, fmt_dec);
 NETDEVICE_SHOW(link_mode, fmt_dec);
 
 /* use same locking rules as GIFHWADDR ioctl's */
-static ssize_t format_addr(char *buf, const unsigned char *addr, int len)
-{
-	int i;
-	char *cp = buf;
-
-	for (i = 0; i < len; i++)
-		cp += sprintf(cp, "%02x%c", addr[i],
-			      i == (len - 1) ? '\n' : ':');
-	return cp - buf;
-}
-
 static ssize_t show_address(struct device *dev, struct device_attribute *attr,
 			    char *buf)
 {
@@ -114,7 +103,7 @@ static ssize_t show_address(struct device *dev, struct device_attribute *attr,
 
 	read_lock(&dev_base_lock);
 	if (dev_isalive(net))
-	    ret = format_addr(buf, net->dev_addr, net->addr_len);
+		ret = format_mac_addr(buf, net->dev_addr, net->addr_len);
 	read_unlock(&dev_base_lock);
 	return ret;
 }
@@ -124,7 +113,7 @@ static ssize_t show_broadcast(struct device *dev,
 {
 	struct net_device *net = to_net_dev(dev);
 	if (dev_isalive(net))
-		return format_addr(buf, net->broadcast, net->addr_len);
+		return format_mac_addr(buf, net->broadcast, net->addr_len);
 	return -EINVAL;
 }
 
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 6b2e454..f760d41 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -359,10 +359,33 @@ struct net_device *alloc_etherdev_mq(int sizeof_priv, unsigned int queue_count)
 }
 EXPORT_SYMBOL(alloc_etherdev_mq);
 
+static ssize_t _format_mac_addr(char *buf, const unsigned char *addr, int len)
+{
+	int i;
+	char *cp = buf;
+
+	for (i = 0; i < len; i++) {
+		cp += sprintf(cp, "%02x", addr[i]);
+		if (i == len - 1)
+			break;
+		*cp++ = ':';
+	}
+	return cp - buf;
+}
+
+ssize_t format_mac_addr(char *buf, const unsigned char *addr, int len)
+{
+	ssize_t l;
+
+	l = _format_mac_addr(buf, addr, len);
+	strcpy(buf + l, "\n");
+	return l + 1;
+}
+EXPORT_SYMBOL(format_mac_addr);
+
 char *print_mac(char *buf, const u8 *addr)
 {
-	sprintf(buf, MAC_FMT,
-		addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
+	_format_mac_addr(buf, addr, ETH_ALEN);
 	return buf;
 }
 EXPORT_SYMBOL(print_mac);



^ permalink raw reply related

* RE: [PATCH 2/2] phylib: add module owner to the mii_bus structure
From: Medve Emilian @ 2007-12-21 20:55 UTC (permalink / raw)
  To: Nicu Ioan Petru, netdev
In-Reply-To: <1198245451-13929-1-git-send-email-ionut.nicu@freescale.com>

Tested-by: Emil Medve <Emilian.Medve@Freescale.com>


> -----Original Message-----
> From: netdev-owner@vger.kernel.org 
> [mailto:netdev-owner@vger.kernel.org] On Behalf Of Nicu Ioan Petru
> Sent: Friday, December 21, 2007 7:58 AM
> To: netdev@vger.kernel.org
> Cc: Nicu Ioan Petru
> Subject: [PATCH 2/2] phylib: add module owner to the mii_bus structure
> 
> Prevent unloading mii bus driver module when other modules 
> have references to some
> phydevs on that bus. Added a new member (module owner) to 
> struct mii_bus and added
> code to increment the mii bus owner module usage count on 
> phy_connect and decrement
> it on phy_disconnect
> 
> Set the module owner in the ucc_geth_mdio driver.
> 
> Signed-off-by: Ionut Nicu <ionut.nicu@freescale.com>
> ---
>  drivers/net/phy/phy_device.c |    9 ++++++++-
>  drivers/net/ucc_geth_mii.c   |    3 +++
>  include/linux/phy.h          |    4 ++++
>  3 files changed, 15 insertions(+), 1 deletions(-)
> 
> diff --git a/drivers/net/phy/phy_device.c 
> b/drivers/net/phy/phy_device.c
> index 5b9e175..7dc5480 100644
> --- a/drivers/net/phy/phy_device.c
> +++ b/drivers/net/phy/phy_device.c
> @@ -178,6 +178,10 @@ struct phy_device * phy_connect(struct 
> net_device *dev, const char *phy_id,
>  	if (phydev->irq > 0)
>  		phy_start_interrupts(phydev);
>  
> +	/* Increment the usage count of the mii bus owner */
> +	if (!try_module_get(phydev->bus->owner))
> +		return ERR_PTR(-EFAULT);
> +
>  	return phydev;
>  }
>  EXPORT_SYMBOL(phy_connect);
> @@ -192,9 +196,12 @@ void phy_disconnect(struct phy_device *phydev)
>  		phy_stop_interrupts(phydev);
>  
>  	phy_stop_machine(phydev);
> -	
> +
>  	phydev->adjust_link = NULL;
>  
> +	/* Decrement the reference count for the mii bus owner */
> +	module_put(phydev->bus->owner);
> +
>  	phy_detach(phydev);
>  }
>  EXPORT_SYMBOL(phy_disconnect);
> diff --git a/drivers/net/ucc_geth_mii.c b/drivers/net/ucc_geth_mii.c
> index a3af4ea..84c7295 100644
> --- a/drivers/net/ucc_geth_mii.c
> +++ b/drivers/net/ucc_geth_mii.c
> @@ -217,6 +217,9 @@ static int uec_mdio_probe(struct 
> of_device *ofdev, const struct of_device_id *ma
>  		}
>  	}
>  
> +	/* register ourselves as the owner of this bus */
> +	new_bus->owner = THIS_MODULE;
> +
>  	err = mdiobus_register(new_bus);
>  	if (0 != err) {
>  		printk(KERN_ERR "%s: Cannot register as MDIO bus\n",
> diff --git a/include/linux/phy.h b/include/linux/phy.h
> index 554836e..04ff6a5 100644
> --- a/include/linux/phy.h
> +++ b/include/linux/phy.h
> @@ -82,6 +82,10 @@ struct mii_bus {
>  	const char *name;
>  	int id;
>  	void *priv;
> +
> +	/* The module that owns this bus */
> +	struct module *owner;
> +
>  	int (*read)(struct mii_bus *bus, int phy_id, int regnum);
>  	int (*write)(struct mii_bus *bus, int phy_id, int 
> regnum, u16 val);
>  	int (*reset)(struct mii_bus *bus);
> -- 
> 1.5.4.rc0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply

* RE: [PATCH 1/2] ucc_geth: split ucc_geth into two modules
From: Medve Emilian @ 2007-12-21 20:55 UTC (permalink / raw)
  To: Nicu Ioan Petru, netdev
In-Reply-To: <1198245443-13911-1-git-send-email-ionut.nicu@freescale.com>

Tested-by: Emil Medve <Emilian.Medve@Freescale.com>


> -----Original Message-----
> From: netdev-owner@vger.kernel.org 
> [mailto:netdev-owner@vger.kernel.org] On Behalf Of Nicu Ioan Petru
> Sent: Friday, December 21, 2007 7:57 AM
> To: netdev@vger.kernel.org
> Cc: Nicu Ioan Petru
> Subject: [PATCH 1/2] ucc_geth: split ucc_geth into two modules
> 
> Split ucc_geth_driver into 2 modules:
> 	- one module for the mii bus (phy devices register to this bus).
> 	- one module for the ethernet driver (uses phy_connect 
> to get a phydev from the mii bus)
> 
> Updated Makefile, Kconfig files and defconfigs (mpc836x, 
> mpc832x_mds, mpc832x_rdb).
> 
> Signed-off-by: Ionut Nicu <ionut.nicu@freescale.com>
> ---
>  arch/powerpc/configs/mpc832x_mds_defconfig |    1 +
>  arch/powerpc/configs/mpc832x_rdb_defconfig |    1 +
>  arch/powerpc/configs/mpc836x_mds_defconfig |    1 +
>  drivers/net/Kconfig                        |    8 ++++++++
>  drivers/net/Makefile                       |    5 ++++-
>  drivers/net/ucc_geth.c                     |   18 +++++-------------
>  drivers/net/ucc_geth_mii.c                 |    9 +++++++++
>  7 files changed, 29 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/powerpc/configs/mpc832x_mds_defconfig 
> b/arch/powerpc/configs/mpc832x_mds_defconfig
> index 2d8951b..1c51739 100644
> --- a/arch/powerpc/configs/mpc832x_mds_defconfig
> +++ b/arch/powerpc/configs/mpc832x_mds_defconfig
> @@ -500,6 +500,7 @@ CONFIG_NETDEV_1000=y
>  # CONFIG_TIGON3 is not set
>  # CONFIG_BNX2 is not set
>  # CONFIG_GIANFAR is not set
> +CONFIG_UCC_MDIO=y
>  CONFIG_UCC_GETH=y
>  # CONFIG_UGETH_NAPI is not set
>  # CONFIG_UGETH_MAGIC_PACKET is not set
> diff --git a/arch/powerpc/configs/mpc832x_rdb_defconfig 
> b/arch/powerpc/configs/mpc832x_rdb_defconfig
> index 761718a..cb4d076 100644
> --- a/arch/powerpc/configs/mpc832x_rdb_defconfig
> +++ b/arch/powerpc/configs/mpc832x_rdb_defconfig
> @@ -503,6 +503,7 @@ CONFIG_E1000=y
>  # CONFIG_TIGON3 is not set
>  # CONFIG_BNX2 is not set
>  # CONFIG_GIANFAR is not set
> +CONFIG_UCC_MDIO=y
>  CONFIG_UCC_GETH=y
>  CONFIG_UGETH_NAPI=y
>  # CONFIG_UGETH_MAGIC_PACKET is not set
> diff --git a/arch/powerpc/configs/mpc836x_mds_defconfig 
> b/arch/powerpc/configs/mpc836x_mds_defconfig
> index c44fc56..92166e9 100644
> --- a/arch/powerpc/configs/mpc836x_mds_defconfig
> +++ b/arch/powerpc/configs/mpc836x_mds_defconfig
> @@ -499,6 +499,7 @@ CONFIG_NETDEV_1000=y
>  # CONFIG_TIGON3 is not set
>  # CONFIG_BNX2 is not set
>  # CONFIG_GIANFAR is not set
> +CONFIG_UCC_MDIO=y
>  CONFIG_UCC_GETH=y
>  # CONFIG_UGETH_NAPI is not set
>  # CONFIG_UGETH_MAGIC_PACKET is not set
> diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
> index d9107e5..7314802 100644
> --- a/drivers/net/Kconfig
> +++ b/drivers/net/Kconfig
> @@ -2315,9 +2315,17 @@ config GFAR_NAPI
>  	bool "Use Rx Polling (NAPI)"
>  	depends on GIANFAR
>  
> +config UCC_MDIO
> +	tristate "Freescale QE UCC MDIO Bus"
> +	depends on QUICC_ENGINE
> +	select PHYLIB
> +	help
> +	  Provides Bus interface for MII Management regs in the 
> UCC register space.
> +
>  config UCC_GETH
>  	tristate "Freescale QE Gigabit Ethernet"
>  	depends on QUICC_ENGINE
> +	select UCC_MDIO
>  	select PHYLIB
>  	help
>  	  This driver supports the Gigabit Ethernet mode of the 
> QUICC Engine,
> diff --git a/drivers/net/Makefile b/drivers/net/Makefile
> index 0e5fde4..97843a3 100644
> --- a/drivers/net/Makefile
> +++ b/drivers/net/Makefile
> @@ -22,8 +22,11 @@ gianfar_driver-objs := gianfar.o \
>  		gianfar_mii.o \
>  		gianfar_sysfs.o
>  
> +obj-$(CONFIG_UCC_MDIO) += ucc_geth_mdio.o
> +ucc_geth_mdio-objs := ucc_geth_mii.o
> +
>  obj-$(CONFIG_UCC_GETH) += ucc_geth_driver.o
> -ucc_geth_driver-objs := ucc_geth.o ucc_geth_mii.o ucc_geth_ethtool.o
> +ucc_geth_driver-objs := ucc_geth.o ucc_geth_ethtool.o
>  
>  #
>  # link order important here
> diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c
> index c6a1902..c33a4cb 100644
> --- a/drivers/net/ucc_geth.c
> +++ b/drivers/net/ucc_geth.c
> @@ -1612,9 +1612,12 @@ static int init_phy(struct net_device *dev)
>  	priv->oldspeed = 0;
>  	priv->oldduplex = -1;
>  
> +	request_module("ucc_geth_mdio");
> +
>  	snprintf(phy_id, BUS_ID_SIZE, PHY_ID_FMT, 
> priv->ug_info->mdio_bus,
>  			priv->ug_info->phy_address);
>  
> +
>  	phydev = phy_connect(dev, phy_id, &adjust_link, 0, 
> priv->phy_interface);
>  
>  	if (IS_ERR(phydev)) {
> @@ -4025,12 +4028,7 @@ static struct of_platform_driver 
> ucc_geth_driver = {
>  
>  static int __init ucc_geth_init(void)
>  {
> -	int i, ret;
> -
> -	ret = uec_mdio_init();
> -
> -	if (ret)
> -		return ret;
> +	int i;
>  
>  	if (netif_msg_drv(&debug))
>  		printk(KERN_INFO "ucc_geth: " DRV_DESC "\n");
> @@ -4038,18 +4036,12 @@ static int __init ucc_geth_init(void)
>  		memcpy(&(ugeth_info[i]), &ugeth_primary_info,
>  		       sizeof(ugeth_primary_info));
>  
> -	ret = of_register_platform_driver(&ucc_geth_driver);
> -
> -	if (ret)
> -		uec_mdio_exit();
> -
> -	return ret;
> +	return of_register_platform_driver(&ucc_geth_driver);
>  }
>  
>  static void __exit ucc_geth_exit(void)
>  {
>  	of_unregister_platform_driver(&ucc_geth_driver);
> -	uec_mdio_exit();
>  }
>  
>  module_init(ucc_geth_init);
> diff --git a/drivers/net/ucc_geth_mii.c b/drivers/net/ucc_geth_mii.c
> index df884f0..a3af4ea 100644
> --- a/drivers/net/ucc_geth_mii.c
> +++ b/drivers/net/ucc_geth_mii.c
> @@ -17,6 +17,7 @@
>   */
>  
>  #include <linux/kernel.h>
> +#include <linux/module.h>
>  #include <linux/sched.h>
>  #include <linux/string.h>
>  #include <linux/errno.h>
> @@ -276,3 +277,11 @@ void uec_mdio_exit(void)
>  {
>  	of_unregister_platform_driver(&uec_mdio_driver);
>  }
> +
> +module_init(uec_mdio_init);
> +module_exit(uec_mdio_exit);
> +
> +MODULE_AUTHOR("Freescale Semiconductor, Inc");
> +MODULE_DESCRIPTION("QE UCC MDIO Bus Implementation");
> +MODULE_VERSION("1.0");
> +MODULE_LICENSE("GPL");
> -- 
> 1.5.4.rc0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply

* Re: [PATCH] [TCP]: Force TSO splits to MSS boundaries
From: Ilpo Järvinen @ 2007-12-21 20:01 UTC (permalink / raw)
  To: David Miller; +Cc: Herbert Xu, Netdev
In-Reply-To: <Pine.LNX.4.64.0712211953580.7301@kivilampi-30.cs.helsinki.fi>

[-- Attachment #1: Type: TEXT/PLAIN, Size: 3013 bytes --]

On Fri, 21 Dec 2007, Ilpo Järvinen wrote:

> How about this, I had to use another approach due to reasons
> outlined in the commit message:
> 
> --
> [PATCH] [TCP]: Force TSO splits to MSS boundaries
> 
> If snd_wnd - snd_nxt wasn't multiple of MSS, skb was split on
> odd boundary by the callers of tcp_window_allows.
> 
> We try really hard to avoid unnecessary modulos. Therefore the
> old caller side check "if (skb->len < limit)" was too wide as
> well because limit is not bound in any way to skb->len and can
> cause spurious testing for trimming in the middle of the queue
> while we only wanted that to happen at the tail of the queue.
> A simple additional caller side check for tcp_write_queue_tail
> would likely have resulted 2 x modulos because the limit would
> have to be first calculated from window, however, doing that
> unnecessary modulo is not mandatory. After a minor change to
> the algorithm, simply determine first if the modulo is needed
> at all and at that point immediately decide also from which
> value it should be calculated from.
> 
> This approach also kills some duplicated code.
> 
> Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
> ---
>  net/ipv4/tcp_output.c |   51 ++++++++++++++++++++++++-------------------------
>  1 files changed, 25 insertions(+), 26 deletions(-)
> 
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 1852698..ea92a1b 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -1008,13 +1008,29 @@ static void tcp_cwnd_validate(struct sock *sk)
>  	}
>  }
>  
> -static unsigned int tcp_window_allows(struct tcp_sock *tp, struct sk_buff *skb, unsigned int mss_now, unsigned int cwnd)
> +/* Returns the portion of skb which can be sent right away without
> + * introducing MSS oddities to segment boundaries. In rare cases where
> + * mss_now != mss_cache, we will request caller to create a small skb
> + * per input skb which could be mostly avoided here (if desired).
> + */
> +static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
> +					unsigned int mss_now,
> +					unsigned int cwnd)
>  {
> -	u32 window, cwnd_len;
> +	struct tcp_sock *tp = tcp_sk(sk);
> +	u32 needed, window, cwnd_len;
>  
>  	window = (tp->snd_una + tp->snd_wnd - TCP_SKB_CB(skb)->seq);
>  	cwnd_len = mss_now * cwnd;
> -	return min(window, cwnd_len);
> +
> +	if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
> +		return cwnd_len;
> +
> +	if (skb == tcp_write_queue_tail(sk) && cwnd_len <= skb->len)
> +		return cwnd_len;

...if desired that this won't cause small skbs in the middle of the queue 
(except in case where just the sub-MSS portion is left out above window), 
something like this could be added here (again, avoiding more modulos):

	if (skb != tcp_write_queue_tail(sk) && window > skb->len)
		return skb->len;

> +	needed = min(skb->len, window);
> +	return needed - needed % mss_now;
>  }
>  
>  /* Can at least one segment of SKB be sent right now, according to the

-- 
 i.

^ permalink raw reply

* Re: TSO trimming question
From: Bill Fink @ 2007-12-21 19:37 UTC (permalink / raw)
  To: Ilpo Järvinen ; +Cc: David Miller, Herbert Xu, Netdev, John Heffner
In-Reply-To: <Pine.LNX.4.64.0712212056220.31652@kivilampi-30.cs.helsinki.fi>

On Fri, 21 Dec 2007, Ilpo Järvinen wrote:

> On Fri, 21 Dec 2007, Bill Fink wrote:
> 
> > On Fri, 21 Dec 2007, Bill Fink wrote:
> > 
> > > Or perhaps even:
> > > 
> > > 	/* Ok, it looks like it is advisable to defer.  */
> > > 	tp->tso_deferred = jiffies;
> > > 
> > > 	/* need to return a non-zero value to defer, which means won't
> > > 	 * defer if jiffies == 0 but it's only a 1 in 4 billion event
> > > 	 * (and avoids a compare/branch by not checking jiffies)
> > > 	 /
> > >         return jiffies;
> > 
> > Ack.  I introduced my own 64-bit to 32-bit issue (too late at night).
> > How about:
> > 
> > 	/* Ok, it looks like it is advisable to defer.  */
> > 	tp->tso_deferred = jiffies;
> > 
> > 	/* this won't defer if jiffies == 0 but it's only a 1 in
> > 	 * 4 billion event (and avoids a branch)
> > 	 */
> > 	return (jiffies != 0);
> 
> I'm not sure how the jiffies work but is this racy as well?
> 
> Simple return tp->tso_deferred; should work, shouldn't it? :-)

As long as tp->tso_deferred remains u32, pending the other issue.

						-Bill

^ permalink raw reply

* Re: TSO trimming question
From: Ilpo Järvinen @ 2007-12-21 18:58 UTC (permalink / raw)
  To: Bill Fink; +Cc: David Miller, Herbert Xu, Netdev, John Heffner
In-Reply-To: <20071221135437.16fa63c8.billfink@mindspring.com>

On Fri, 21 Dec 2007, Bill Fink wrote:

> On Fri, 21 Dec 2007, Bill Fink wrote:
> 
> > Or perhaps even:
> > 
> > 	/* Ok, it looks like it is advisable to defer.  */
> > 	tp->tso_deferred = jiffies;
> > 
> > 	/* need to return a non-zero value to defer, which means won't
> > 	 * defer if jiffies == 0 but it's only a 1 in 4 billion event
> > 	 * (and avoids a compare/branch by not checking jiffies)
> > 	 /
> >         return jiffies;
> 
> Ack.  I introduced my own 64-bit to 32-bit issue (too late at night).
> How about:
> 
> 	/* Ok, it looks like it is advisable to defer.  */
> 	tp->tso_deferred = jiffies;
> 
> 	/* this won't defer if jiffies == 0 but it's only a 1 in
> 	 * 4 billion event (and avoids a branch)
> 	 */
> 	return (jiffies != 0);

I'm not sure how the jiffies work but is this racy as well?

Simple return tp->tso_deferred; should work, shouldn't it? :-)


-- 
 i.

^ permalink raw reply

* [PATCH] [TCP]: Force TSO splits to MSS boundaries
From: Ilpo Järvinen @ 2007-12-21 18:55 UTC (permalink / raw)
  To: David Miller; +Cc: Herbert Xu, Netdev
In-Reply-To: <20071220.155518.42408378.davem@davemloft.net>

[-- Attachment #1: Type: TEXT/PLAIN, Size: 4791 bytes --]

On Thu, 20 Dec 2007, David Miller wrote:

> From: Herbert Xu <herbert@gondor.apana.org.au>
> Date: Thu, 20 Dec 2007 22:00:12 +0800
> 
> > On Thu, Dec 20, 2007 at 04:00:37AM -0800, David Miller wrote:
> > >
> > > In the most ideal sense, tcp_window_allows() should probably
> > > be changed to only return MSS multiples.
> > > 
> > > Unfortunately this would add an expensive modulo operation,
> > > however I think it would elimiate this problem case.
> > 
> > Well you only have to divide in the unlikely case of us being
> > limited by the receiver window.  In that case speed is probably
> > not of the essence anyway.
> 
> Agreed, to some extent.
> 
> I say "to some extent" because it might be realistic, with
> lots (millions) of sockets to hit this case a lot.
> 
> There are so many things that are a "don't care" performance
> wise until you have a lot of stinky connections over crappy
> links.

How about this, I had to use another approach due to reasons
outlined in the commit message:

--
[PATCH] [TCP]: Force TSO splits to MSS boundaries

If snd_wnd - snd_nxt wasn't multiple of MSS, skb was split on
odd boundary by the callers of tcp_window_allows.

We try really hard to avoid unnecessary modulos. Therefore the
old caller side check "if (skb->len < limit)" was too wide as
well because limit is not bound in any way to skb->len and can
cause spurious testing for trimming in the middle of the queue
while we only wanted that to happen at the tail of the queue.
A simple additional caller side check for tcp_write_queue_tail
would likely have resulted 2 x modulos because the limit would
have to be first calculated from window, however, doing that
unnecessary modulo is not mandatory. After a minor change to
the algorithm, simply determine first if the modulo is needed
at all and at that point immediately decide also from which
value it should be calculated from.

This approach also kills some duplicated code.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@helsinki.fi>
---
 net/ipv4/tcp_output.c |   51 ++++++++++++++++++++++++-------------------------
 1 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1852698..ea92a1b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1008,13 +1008,29 @@ static void tcp_cwnd_validate(struct sock *sk)
 	}
 }
 
-static unsigned int tcp_window_allows(struct tcp_sock *tp, struct sk_buff *skb, unsigned int mss_now, unsigned int cwnd)
+/* Returns the portion of skb which can be sent right away without
+ * introducing MSS oddities to segment boundaries. In rare cases where
+ * mss_now != mss_cache, we will request caller to create a small skb
+ * per input skb which could be mostly avoided here (if desired).
+ */
+static unsigned int tcp_mss_split_point(struct sock *sk, struct sk_buff *skb,
+					unsigned int mss_now,
+					unsigned int cwnd)
 {
-	u32 window, cwnd_len;
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 needed, window, cwnd_len;
 
 	window = (tp->snd_una + tp->snd_wnd - TCP_SKB_CB(skb)->seq);
 	cwnd_len = mss_now * cwnd;
-	return min(window, cwnd_len);
+
+	if (likely(cwnd_len <= window && skb != tcp_write_queue_tail(sk)))
+		return cwnd_len;
+
+	if (skb == tcp_write_queue_tail(sk) && cwnd_len <= skb->len)
+		return cwnd_len;
+
+	needed = min(skb->len, window);
+	return needed - needed % mss_now;
 }
 
 /* Can at least one segment of SKB be sent right now, according to the
@@ -1445,17 +1461,9 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
 		}
 
 		limit = mss_now;
-		if (tso_segs > 1) {
-			limit = tcp_window_allows(tp, skb,
-						  mss_now, cwnd_quota);
-
-			if (skb->len < limit) {
-				unsigned int trim = skb->len % mss_now;
-
-				if (trim)
-					limit = skb->len - trim;
-			}
-		}
+		if (tso_segs > 1)
+			limit = tcp_mss_split_point(sk, skb, mss_now,
+						    cwnd_quota);
 
 		if (skb->len > limit &&
 		    unlikely(tso_fragment(sk, skb, limit, mss_now)))
@@ -1502,7 +1510,6 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
  */
 void tcp_push_one(struct sock *sk, unsigned int mss_now)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb = tcp_send_head(sk);
 	unsigned int tso_segs, cwnd_quota;
 
@@ -1517,17 +1524,9 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
 		BUG_ON(!tso_segs);
 
 		limit = mss_now;
-		if (tso_segs > 1) {
-			limit = tcp_window_allows(tp, skb,
-						  mss_now, cwnd_quota);
-
-			if (skb->len < limit) {
-				unsigned int trim = skb->len % mss_now;
-
-				if (trim)
-					limit = skb->len - trim;
-			}
-		}
+		if (tso_segs > 1)
+			limit = tcp_mss_split_point(sk, skb, mss_now,
+						    cwnd_quota);
 
 		if (skb->len > limit &&
 		    unlikely(tso_fragment(sk, skb, limit, mss_now)))
-- 
1.5.0.6

^ permalink raw reply related

* Re: TSO trimming question
From: Bill Fink @ 2007-12-21 18:54 UTC (permalink / raw)
  To: Bill Fink; +Cc: David Miller, herbert, ilpo.jarvinen, netdev, jheffner
In-Reply-To: <20071221055822.8d977800.billfink@mindspring.com>

On Fri, 21 Dec 2007, Bill Fink wrote:

> Or perhaps even:
> 
> 	/* Ok, it looks like it is advisable to defer.  */
> 	tp->tso_deferred = jiffies;
> 
> 	/* need to return a non-zero value to defer, which means won't
> 	 * defer if jiffies == 0 but it's only a 1 in 4 billion event
> 	 * (and avoids a compare/branch by not checking jiffies)
> 	 /
>         return jiffies;

Ack.  I introduced my own 64-bit to 32-bit issue (too late at night).
How about:

	/* Ok, it looks like it is advisable to defer.  */
	tp->tso_deferred = jiffies;

	/* this won't defer if jiffies == 0 but it's only a 1 in
	 * 4 billion event (and avoids a branch)
	 */
	return (jiffies != 0);

							-Bill

^ permalink raw reply

* Re: [PATCH]  New driver "sfc" for Solarstorm SFC4000 controller - 3nd try
From: Ben Hutchings @ 2007-12-21 18:30 UTC (permalink / raw)
  To: Randy Dunlap; +Cc: netdev, linux-net-drivers
In-Reply-To: <20071221100253.fb5b0899.randy.dunlap@oracle.com>

Randy Dunlap wrote:
> On Fri, 21 Dec 2007 16:53:40 +0000 Robert Stonehouse wrote:
> 
> > This is a resubmission of a new driver for Solarflare network controllers.
<snip>
> > The last two patches were marked with RFC but I now think that this driver
> > is ready (withstanding any further review comments) and I would like to ask
> > that this driver is considered for merging.
> > 
> > 
> > The patch (against net-2.6.25) is at:
> >      https://support.solarflare.com/netdev/3/net-2.6.25-sfc-2.2.0029.patch
> 
> wow, 750+ KB
> 
> How many drivers is this?

Just two: sfc (net) and sfc_mtd (MTD).  As Robert said, the net driver
supports a variety of PHYs.  It also has a fair amount of self-test
and (conditional) debug code, and comments.

I've noted your comments on kernel-doc and Kconfig format and will
address those in the next version.

<snip>
> c.  Driver contains MTD, SPI, & I2C (at least) code and needs to be
> reviewed by people in those areas as well (IMO).
> 
> I see an MTD dependency in the Kconfig file.
> What about the SPI and I2C parts?  Are they conditional or
> how is that handled?  or does the driver not use the kernel
> infrastructure for these?

We are not currently using the kernel infrastructure for those.
I'm not sure whether we could do.

I think everyone working on the net driver here will be away until
the new year, so please forgive our silence in the mean time.

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare Communications
Not speaking for my employer; that's the marketing department's job.

^ permalink raw reply

* [PATCH] net: santize headers for iproute2
From: Stephen Hemminger @ 2007-12-21 17:43 UTC (permalink / raw)
  To: David Miller, Vitaliy Gusev; +Cc: netdev

iproute2 source uses headers that result from "make headers_install".
The header files net/tcp_states.h and net/veth.h are both being used
but are not processed. 

Signed-off-by: Stephen Hemminger <stephen.hemminger@vyatta.com>

---
This patch is not "urgent" but it would be good to get it in 2.6.24.

--- a/include/Kbuild	2007-12-21 09:33:36.000000000 -0800
+++ b/include/Kbuild	2007-12-21 09:33:42.000000000 -0800
@@ -4,5 +4,6 @@ header-y += sound/
 header-y += mtd/
 header-y += rdma/
 header-y += video/
+header-y += net/
 
 header-y += asm-$(ARCH)/
--- /dev/null	1970-01-01 00:00:00.000000000 +0000
+++ b/include/net/Kbuild	2007-12-21 09:32:20.000000000 -0800
@@ -0,0 +1,2 @@
+header-y += tcp_states.h
+header-y += veth.h

^ permalink raw reply

* Re: [PATCH]  New driver "sfc" for Solarstorm SFC4000 controller - 3nd try
From: Randy Dunlap @ 2007-12-21 18:02 UTC (permalink / raw)
  To: Robert Stonehouse; +Cc: netdev, linux-net-drivers, spope
In-Reply-To: <476BEF94.4000801@solarflare.com>

On Fri, 21 Dec 2007 16:53:40 +0000 Robert Stonehouse wrote:

> This is a resubmission of a new driver for Solarflare network controllers.
> 
> The driver supports several types of PHY (10Gbase-T, XFP, CX4) on six
> different 10G and 1G boards.
> 
> The previous thread was:
>   [PATCH] [RFC] New driver "sfc" for Solarstorm SFC4000 controller
>   http://marc.info/?l=linux-netdev&m=119757352830103&w=2
> 
> Thanks to the people who looked at the previous patches. We have addressed
> the following from received comments after the 2nd submission:
>   - Use kzalloc where appropriate
>   - Remove deprecated fastcall attributes
>   - Use kernel routines for hex dumps and MAC address printing
>   - Changes to logging MACROs
> 
> The last two patches were marked with RFC but I now think that this driver
> is ready (withstanding any further review comments) and I would like to ask
> that this driver is considered for merging.
> 
> 
> The patch (against net-2.6.25) is at:
>      https://support.solarflare.com/netdev/3/net-2.6.25-sfc-2.2.0029.patch

wow, 750+ KB

How many drivers is this?


> The new files may also be downloaded as a tarball:
>      https://support.solarflare.com/netdev/3/net-2.6.25-sfc-2.2.0029.tgz
> 
> And for verification there is:
>      https://support.solarflare.com/netdev/3/MD5SUMS


Hi,
I have just a few comments, mostly related to infrastructure.

a.  Don't use kernel-doc comment flags (i.e., "/**" to begin a comment
block) when the comment is not in kernel-doc format.  Or use
kernel-doc format.  :)
(as documented in Documentation/kernel-doc-nano-HOWTO.txt)

E.g.:
+/** @file
+ *
+ * Efx driverlink
+ *
+ * This header file defines the portions of the Efx driverlink
+ * interface that are used only by the kernel net driver, and are not
+ * part of the public interface specification.
+ */

b.  Kconfig file:

Use 1 tab (not 8 spaces) to indent below "config" items.
Use 1 tab + 2 spaces to indent help text below "help" lines.


c.  Driver contains MTD, SPI, & I2C (at least) code and needs to be
reviewed by people in those areas as well (IMO).

I see an MTD dependency in the Kconfig file.
What about the SPI and I2C parts?  Are they conditional or
how is that handled?  or does the driver not use the kernel
infrastructure for these?


---
~Randy

^ permalink raw reply

* Re: [PATCH net-2.6.25 3/3] Uninline the inet_twsk_put function
From: Ingo Oeser @ 2007-12-21 17:53 UTC (permalink / raw)
  To: David Miller; +Cc: xemul, netdev, devel
In-Reply-To: <20071220.160840.140150524.davem@davemloft.net>

Hi David,

David Miller schrieb:
> "inet_timewait_sock" begins with a "struct sock_common"
> which is where the atomic_t is, and:
> 
> #define tw_refcnt		__tw_common.skc_refcnt
> 
> So you would have to change struct sock_common over to kref, and thus
> the entire networking, in order to make such a change.

Ok, that sounds too much. Many thanks for following up and taking the time
to explain it.
 
> But you would have seen this instantly if you had spent 5 seconds
> looking at how these datastructures are defined.  Instead you choose
> to make me do it and explain it to you instead.

Sorry, just matched the wrong pattern here :-)


Best Regards

Ingo Oeser

^ permalink raw reply

* Re: BNX2 warning
From: Michael Chan @ 2007-12-21 18:34 UTC (permalink / raw)
  To: David Miller; +Cc: netdev
In-Reply-To: <20071220.203935.30925934.davem@davemloft.net>

On Thu, 2007-12-20 at 20:39 -0800, David Miller wrote:

> Michael, please fix this, thanks :-)
> 
> drivers/net/bnx2.c: In function 'bnx2_init_napi':
> drivers/net/bnx2.c:7329: warning: no return statement in function returning non-void
> 

[BNX2]: Fix compiler warning.

Change bnx2_init_napi() to void.

Warning was noted by DaveM.

Signed-off-by: Michael Chan <mchan@broadcom.com>

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index aef2a9a..94d1857 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -7313,7 +7313,7 @@ bnx2_bus_string(struct bnx2 *bp, char *str)
 	return str;
 }
 
-static int __devinit
+static void __devinit
 bnx2_init_napi(struct bnx2 *bp)
 {
 	int i;



^ permalink raw reply related

* Re: [PATCH 2/2] Module for ip utility to support veth device
From: Stephen Hemminger @ 2007-12-21 17:38 UTC (permalink / raw)
  To: Vitaliy Gusev; +Cc: netdev, Pavel Emelyanov, Patrick McHardy
In-Reply-To: <200712181515.38565.vgusev@openvz.org>

On Tue, 18 Dec 2007 15:15:38 +0300
Vitaliy Gusev <vgusev@openvz.org> wrote:

> 
> module link_veth
> 
> Signed-off-by: Vitaliy Gusev <vgusev@openvz.org>
> 
> ---
> 

Applied both patches, and moved veth.h from ip/veth.h to include/net/veth.h
so that the header file will be correctly updated if ABI changes.

-- 
Stephen Hemminger <stephen.hemminger@vyatta.com>

^ permalink raw reply

* Re: [PATCH] Fix lost export-dynamic
From: Stephen Hemminger @ 2007-12-21 17:17 UTC (permalink / raw)
  To: Vitaliy Gusev; +Cc: Patrick McHardy, netdev
In-Reply-To: <200712171606.38546.vgusev@openvz.org>

On Mon, 17 Dec 2007 16:06:38 +0300
Vitaliy Gusev <vgusev@openvz.org> wrote:

> get_link_kind() fails for statically linked modules (vlan, veth, etc.) if "ip" 
> was linked without "export-dynamic".
> 
> Signed-off-by: Vitaliy Gusev <vgusev@openvz.org>
> 

applied thanks

-- 
Stephen Hemminger <stephen.hemminger@vyatta.com>

^ permalink raw reply

* Re: [PATCH] [IPROUTE]: A workaround to make larger rto_min printed correctly
From: Stephen Hemminger @ 2007-12-21 17:14 UTC (permalink / raw)
  To: YOSHIFUJI Hideaki / 吉藤英明
  Cc: satoru.satoh, netdev, yoshfuji
In-Reply-To: <20071221.225804.129665785.yoshfuji@linux-ipv6.org>

On Fri, 21 Dec 2007 22:58:04 +0900 (JST)
YOSHIFUJI Hideaki / 吉藤英明 <yoshfuji@linux-ipv6.org> wrote:

> In article <d72b7ace0712210549u51043bf8k585fd70e1ded7d8a@mail.gmail.com> (at Fri, 21 Dec 2007 22:49:59 +0900), "Satoru SATOH" <satoru.satoh@gmail.com> says:
> 
> > I agree.
> > 
> > I mistakenly thought hz in that context must be larger than 1000..
> > As it's uncertain, your's looks much simpler and better.
> > 
> > (btw, the lines "else .... div = 1" is not needed, is it?)
> 
> Simplest fix is as follows:
> 
> Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
> --
> diff --git a/ip/iproute.c b/ip/iproute.c
> index f4200ae..7a885b0 100644
> --- a/ip/iproute.c
> +++ b/ip/iproute.c
> @@ -509,7 +509,7 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
>  			    i != RTAX_RTO_MIN)
>  				fprintf(fp, " %u", *(unsigned*)RTA_DATA(mxrta[i]));
>  			else {
> -				unsigned val = *(unsigned*)RTA_DATA(mxrta[i]);
> +				unsigned long long val = *(unsigned*)RTA_DATA(mxrta[i]);
>  
>  				val *= 1000;
>  				if (i == RTAX_RTT)
> @@ -517,7 +517,7 @@ int print_route(const struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
>  				else if (i == RTAX_RTTVAR)
>  					val /= 4;
>  				if (val >= hz)
> -					fprintf(fp, " %ums", val/hz);
> +					fprintf(fp, " %llums", val/hz);
>  				else
>  					fprintf(fp, " %.2fms", (float)val/hz);
>  			}
> 

applied thanks.

-- 
Stephen Hemminger <stephen.hemminger@vyatta.com>

^ permalink raw reply

* [PATCH]  New driver "sfc" for Solarstorm SFC4000 controller - 3nd try
From: Robert Stonehouse @ 2007-12-21 16:53 UTC (permalink / raw)
  To: netdev; +Cc: linux-net-drivers, spope

This is a resubmission of a new driver for Solarflare network controllers.

The driver supports several types of PHY (10Gbase-T, XFP, CX4) on six
different 10G and 1G boards.

The previous thread was:
  [PATCH] [RFC] New driver "sfc" for Solarstorm SFC4000 controller
  http://marc.info/?l=linux-netdev&m=119757352830103&w=2

Thanks to the people who looked at the previous patches. We have addressed
the following from received comments after the 2nd submission:
  - Use kzalloc where appropriate
  - Remove deprecated fastcall attributes
  - Use kernel routines for hex dumps and MAC address printing
  - Changes to logging MACROs

The last two patches were marked with RFC but I now think that this driver
is ready (withstanding any further review comments) and I would like to ask
that this driver is considered for merging.


The patch (against net-2.6.25) is at:
     https://support.solarflare.com/netdev/3/net-2.6.25-sfc-2.2.0029.patch

The new files may also be downloaded as a tarball:
     https://support.solarflare.com/netdev/3/net-2.6.25-sfc-2.2.0029.tgz

And for verification there is:
     https://support.solarflare.com/netdev/3/MD5SUMS


Happy Christmas

-- 
Robert Stonehouse

^ permalink raw reply

* Re: Top 10 kernel oopses/warnings for the week of December 21st 2007
From: Andi Kleen @ 2007-12-21 16:22 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Andi Kleen, linux-kernel, akpm, torvalds, netdev, Marcel Holtmann,
	Pallipadi, Venkatesh
In-Reply-To: <476BCE15.6090902@linux.intel.com>

> in this case this is really all the version information available ;(
> it seems to be a patched kernel without patched EXTRAVERSION.
> But in the future if I have more specific information (eg it's only 1 
> kernel version) I'll mention it in more detail.
> It gets unwieldy if there's 500 reports for an oops of course ;)

Hmm would there be an automatic way to check out the file of the 
kernel version and then check if the BUG_ON/WARN_ON is on that line?
Maybe it could be done using git.

> 
> >
> >Anyways there are a lot of third party modules who do strange 
> >things with c_p_a(), not always legal, so you might look up out for that 
> >pattern too. Perhaps report the out of tree modules loaded in the 
> >summary too?
> 
> I already always will mention if the oops is tainted or not (that I track 
> specifically);

I don't necessarily mean tainted, just out of tree modules in general.
There are some GPL modules who do strange things too. Not saying
that these oopses should be all ignored -- they might be legitimate
kernel bugs that they just trigger -- just it should be visible somehow
in the summary in case there is a pattern. 

Especially for c_p_a() i'm quite suspicious
because it depends a lot on what the caller did.

One way perhaps would be also to check if there is an out of tree
module inside the backtrace. I suppose you could keep a list 
of in tree modules and do this automatically. Of course there could
be false positives too with the standard inexact backtrace.

-Andi


^ permalink raw reply

* Hello
From: fdgv fg @ 2007-12-21 15:38 UTC (permalink / raw)
  To: netdev, anonymous

Dear Sir / Madam:
   Nice to meet you.Here is a more useful information if you are
searching any high-quality digital products with a lower price.We have
a massive stock where you nearly can find what you want.We strive to
offer you best sevice! With the advanced transportation,we assure you
a safe and fast shippment! please contact me.
my website: www.elec-offer.com
MSN: elec-offer@hotmail.com
Best regards
Sincerely yours
Li

^ permalink raw reply

* Re: [XFRM]: Fix outbound statistics.
From: Herbert Xu @ 2007-12-21 15:11 UTC (permalink / raw)
  To: Masahide NAKAMURA; +Cc: davem, netdev, usagi-core
In-Reply-To: <1198247100998-git-send-email-nakam@linux-ipv6.org>

On Fri, Dec 21, 2007 at 11:25:00PM +0900, Masahide NAKAMURA wrote:
>
>  	do {
>  		err = xfrm_state_check_space(x, skb);
> -		if (err)
> +		if (err) {
> +			XFRM_INC_STATS(LINUX_MIB_XFRMOUTERROR);
>  			goto error_nolock;
> +		}
>  
>  		err = x->outer_mode->output(x, skb);
> -		if (err)
> +		if (err) {
> +			XFRM_INC_STATS(LINUX_MIB_XFRMOUTSTATEMODEERROR);

BTW, none of our existing mode output functions actually return
an error.  I noticed that the description for this field is actually
"Transformation mode specific error, e.g. Outer header space is not
enough".  This is slightly misleading as output header space is
checked by xfrm_state_check_space so if there's an error that's
where it'll show up.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply

* [PATCH 2/2] phylib: add module owner to the mii_bus structure
From: Ionut Nicu @ 2007-12-21 13:57 UTC (permalink / raw)
  To: netdev; +Cc: Ionut Nicu

Prevent unloading mii bus driver module when other modules have references to some
phydevs on that bus. Added a new member (module owner) to struct mii_bus and added
code to increment the mii bus owner module usage count on phy_connect and decrement
it on phy_disconnect

Set the module owner in the ucc_geth_mdio driver.

Signed-off-by: Ionut Nicu <ionut.nicu@freescale.com>
---
 drivers/net/phy/phy_device.c |    9 ++++++++-
 drivers/net/ucc_geth_mii.c   |    3 +++
 include/linux/phy.h          |    4 ++++
 3 files changed, 15 insertions(+), 1 deletions(-)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 5b9e175..7dc5480 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -178,6 +178,10 @@ struct phy_device * phy_connect(struct net_device *dev, const char *phy_id,
 	if (phydev->irq > 0)
 		phy_start_interrupts(phydev);
 
+	/* Increment the usage count of the mii bus owner */
+	if (!try_module_get(phydev->bus->owner))
+		return ERR_PTR(-EFAULT);
+
 	return phydev;
 }
 EXPORT_SYMBOL(phy_connect);
@@ -192,9 +196,12 @@ void phy_disconnect(struct phy_device *phydev)
 		phy_stop_interrupts(phydev);
 
 	phy_stop_machine(phydev);
-	
+
 	phydev->adjust_link = NULL;
 
+	/* Decrement the reference count for the mii bus owner */
+	module_put(phydev->bus->owner);
+
 	phy_detach(phydev);
 }
 EXPORT_SYMBOL(phy_disconnect);
diff --git a/drivers/net/ucc_geth_mii.c b/drivers/net/ucc_geth_mii.c
index a3af4ea..84c7295 100644
--- a/drivers/net/ucc_geth_mii.c
+++ b/drivers/net/ucc_geth_mii.c
@@ -217,6 +217,9 @@ static int uec_mdio_probe(struct of_device *ofdev, const struct of_device_id *ma
 		}
 	}
 
+	/* register ourselves as the owner of this bus */
+	new_bus->owner = THIS_MODULE;
+
 	err = mdiobus_register(new_bus);
 	if (0 != err) {
 		printk(KERN_ERR "%s: Cannot register as MDIO bus\n",
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 554836e..04ff6a5 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -82,6 +82,10 @@ struct mii_bus {
 	const char *name;
 	int id;
 	void *priv;
+
+	/* The module that owns this bus */
+	struct module *owner;
+
 	int (*read)(struct mii_bus *bus, int phy_id, int regnum);
 	int (*write)(struct mii_bus *bus, int phy_id, int regnum, u16 val);
 	int (*reset)(struct mii_bus *bus);
-- 
1.5.4.rc0


^ permalink raw reply related

* [patch net-2.6.25 0/2][NETNS] prepare the neigh subsys to handle network namespaces v2
From: Daniel Lezcano @ 2007-12-21 13:59 UTC (permalink / raw)
  To: davem; +Cc: netdev, den, benjamin.thery

These two patches are coming from Eric Biederman patchset
to make the neighbouring aware of the namespaces.

The description in the patch headers is big enough to add
more comments here  :) 

Changelog:
 v2 :
	fixed bad coding style
 v1 : 
	initial post

-- 

^ permalink raw reply

* [patch net-2.6.25 2/2][NETNS] net: Add a helper function neigh_param_default_alloc
From: Daniel Lezcano @ 2007-12-21 13:59 UTC (permalink / raw)
  To: davem; +Cc: netdev, den, benjamin.thery, Eric W. Biederman
In-Reply-To: <20071221135922.726347008@ICON-9-164-138-215.megacenter.de.ibm.com>

[-- Attachment #1: 0002-net-Add-a-helper-function-neigh_param_default_alloc.patch --]
[-- Type: text/plain, Size: 2127 bytes --]

In the presence of multiple network namespaces the logic needed
to allocate the a default parameter table is just barely non-trivial.
So add a function to automate it to make everyone's life easier.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
---
 include/net/neighbour.h |    1 +
 net/core/neighbour.c    |   15 +++++++++++++++
 2 files changed, 16 insertions(+)

Index: net-2.6.25/include/net/neighbour.h
===================================================================
--- net-2.6.25.orig/include/net/neighbour.h
+++ net-2.6.25/include/net/neighbour.h
@@ -208,6 +208,7 @@ extern struct neighbour 	*neigh_event_ns
 						struct net_device *dev);
 
 extern struct neigh_parms	*neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl);
+extern struct neigh_parms	*neigh_parms_alloc_default(struct neigh_table *tbl, struct net *net);
 extern void			neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms);
 extern void			neigh_parms_destroy(struct neigh_parms *parms);
 extern unsigned long		neigh_rand_reach_time(unsigned long base);
Index: net-2.6.25/net/core/neighbour.c
===================================================================
--- net-2.6.25.orig/net/core/neighbour.c
+++ net-2.6.25/net/core/neighbour.c
@@ -1325,6 +1325,20 @@ struct neigh_parms *neigh_parms_alloc(st
 	return p;
 }
 
+struct neigh_parms *neigh_parms_alloc_default(struct neigh_table *tbl,
+						struct net *net)
+{
+	struct neigh_parms *parms;
+	if (net != &init_net) {
+		parms = neigh_parms_alloc(NULL, tbl);
+		release_net(parms->net);
+		parms->net = hold_net(net);
+	}
+	else
+		parms = neigh_parms_clone(&tbl->parms);
+	return parms;
+}
+
 static void neigh_rcu_free_parms(struct rcu_head *head)
 {
 	struct neigh_parms *parms =
@@ -2787,6 +2801,7 @@ EXPORT_SYMBOL(neigh_ifdown);
 EXPORT_SYMBOL(neigh_lookup);
 EXPORT_SYMBOL(neigh_lookup_nodev);
 EXPORT_SYMBOL(neigh_parms_alloc);
+EXPORT_SYMBOL(neigh_parms_alloc_default);
 EXPORT_SYMBOL(neigh_parms_release);
 EXPORT_SYMBOL(neigh_rand_reach_time);
 EXPORT_SYMBOL(neigh_resolve_output);

-- 

^ permalink raw reply

* [patch net-2.6.25 1/2][NETNS] net: Modify the neighbour table code so it handles multiple network namespaces
From: Daniel Lezcano @ 2007-12-21 13:59 UTC (permalink / raw)
  To: davem; +Cc: netdev, den, benjamin.thery, Eric W. Biederman
In-Reply-To: <20071221135922.726347008@ICON-9-164-138-215.megacenter.de.ibm.com>

[-- Attachment #1: 0001-net-Modify-the-neighbour-table-code-so-it-handles-m.patch --]
[-- Type: text/plain, Size: 20414 bytes --]

I'm actually surprised at how much was involved.  At first glance it appears
that the neighbour table data structures are already split by network device
so all that should be needed is to modify the user interface commands
to filter the set of neighbours by the network namespace of their devices.

However a couple things turned up while I was reading through the code.
The proxy neighbour table allows entries with no network device, and
the neighbour parms are per network device (except for the defaults)
so they now need a per network namespace default.

So I updated the two structures (which surprised me) with their very
own network namespace parameter.  Updated the relevant lookup and
destroy routines with a network namespace parameter and modified
the code that interacts with users to filter out neighbour
table entries for devices of other namespaces.

I'm a little concerned that we can modify and display the global
table configuration and from all network namespaces.  But
this appears good enough for now.

I keep thinking modifying the neighbour table to have per network
namespace instances of each table type would should be cleaner.  The
hash table is already dynamically sized so there are it is not a limiter.
The default parameter would be straight forward to take care of.  However
when I look at the how the network table is built and used I still find
some assumptions that there is only a single neighbour table for each
type of table in the kernel.  The netlink operations, neigh_seq_start,
the non-core network users that call neigh_lookup.  So while it might
be doable it would require more refactoring than my current approach
of just doing a little extra filtering in the code.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
---
 include/net/neighbour.h |   10 ++--
 net/atm/clip.c          |   15 +++++-
 net/core/neighbour.c    |  116 +++++++++++++++++++++++++++++-------------------
 net/decnet/dn_neigh.c   |    6 +-
 net/decnet/dn_route.c   |    2 
 net/ipv4/arp.c          |   12 ++--
 net/ipv6/ip6_output.c   |    2 
 net/ipv6/ndisc.c        |    4 -
 8 files changed, 106 insertions(+), 61 deletions(-)

Index: net-2.6.25/include/net/neighbour.h
===================================================================
--- net-2.6.25.orig/include/net/neighbour.h
+++ net-2.6.25/include/net/neighbour.h
@@ -34,6 +34,7 @@ struct neighbour;
 
 struct neigh_parms
 {
+	struct net *net;
 	struct net_device *dev;
 	struct neigh_parms *next;
 	int	(*neigh_setup)(struct neighbour *);
@@ -126,7 +127,8 @@ struct neigh_ops
 struct pneigh_entry
 {
 	struct pneigh_entry	*next;
-	struct net_device		*dev;
+	struct net		*net;
+	struct net_device	*dev;
 	u8			flags;
 	u8			key[0];
 };
@@ -187,6 +189,7 @@ extern struct neighbour *	neigh_lookup(s
 					     const void *pkey,
 					     struct net_device *dev);
 extern struct neighbour *	neigh_lookup_nodev(struct neigh_table *tbl,
+						   struct net *net,
 						   const void *pkey);
 extern struct neighbour *	neigh_create(struct neigh_table *tbl,
 					     const void *pkey,
@@ -211,8 +214,8 @@ extern unsigned long		neigh_rand_reach_t
 
 extern void			pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
 					       struct sk_buff *skb);
-extern struct pneigh_entry	*pneigh_lookup(struct neigh_table *tbl, const void *key, struct net_device *dev, int creat);
-extern int			pneigh_delete(struct neigh_table *tbl, const void *key, struct net_device *dev);
+extern struct pneigh_entry	*pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev, int creat);
+extern int			pneigh_delete(struct neigh_table *tbl, struct net *net, const void *key, struct net_device *dev);
 
 extern void neigh_app_ns(struct neighbour *n);
 extern void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie);
@@ -220,6 +223,7 @@ extern void __neigh_for_each_release(str
 extern void pneigh_for_each(struct neigh_table *tbl, void (*cb)(struct pneigh_entry *));
 
 struct neigh_seq_state {
+	struct net *net;
 	struct neigh_table *tbl;
 	void *(*neigh_sub_iter)(struct neigh_seq_state *state,
 				struct neighbour *n, loff_t *pos);
Index: net-2.6.25/net/atm/clip.c
===================================================================
--- net-2.6.25.orig/net/atm/clip.c
+++ net-2.6.25/net/atm/clip.c
@@ -949,6 +949,11 @@ static int arp_seq_open(struct inode *in
 
 	seq = file->private_data;
 	seq->private = state;
+	state->ns.net = get_proc_net(inode);
+	if (!state->ns.net) {
+		seq_release_private(inode, file);
+		rc = -ENXIO;
+	}
 out:
 	return rc;
 
@@ -957,11 +962,19 @@ out_kfree:
 	goto out;
 }
 
+static int arp_seq_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq = file->private_data;
+	struct clip_seq_state *state = seq->private;
+	put_net(state->ns.net);
+	return seq_release_private(inode, file);
+}
+
 static const struct file_operations arp_seq_fops = {
 	.open		= arp_seq_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= seq_release_private,
+	.release	= arp_seq_release,
 	.owner		= THIS_MODULE
 };
 #endif
Index: net-2.6.25/net/core/neighbour.c
===================================================================
--- net-2.6.25.orig/net/core/neighbour.c
+++ net-2.6.25/net/core/neighbour.c
@@ -375,7 +375,8 @@ struct neighbour *neigh_lookup(struct ne
 	return n;
 }
 
-struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey)
+struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
+				     const void *pkey)
 {
 	struct neighbour *n;
 	int key_len = tbl->key_len;
@@ -385,7 +386,8 @@ struct neighbour *neigh_lookup_nodev(str
 
 	read_lock_bh(&tbl->lock);
 	for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
-		if (!memcmp(n->primary_key, pkey, key_len)) {
+		if (!memcmp(n->primary_key, pkey, key_len) &&
+		    (net == n->dev->nd_net)) {
 			neigh_hold(n);
 			NEIGH_CACHE_STAT_INC(tbl, hits);
 			break;
@@ -463,7 +465,8 @@ out_neigh_release:
 	goto out;
 }
 
-struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, const void *pkey,
+struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
+				    struct net *net, const void *pkey,
 				    struct net_device *dev, int creat)
 {
 	struct pneigh_entry *n;
@@ -479,6 +482,7 @@ struct pneigh_entry * pneigh_lookup(stru
 
 	for (n = tbl->phash_buckets[hash_val]; n; n = n->next) {
 		if (!memcmp(n->key, pkey, key_len) &&
+		    (n->net == net) &&
 		    (n->dev == dev || !n->dev)) {
 			read_unlock_bh(&tbl->lock);
 			goto out;
@@ -495,6 +499,7 @@ struct pneigh_entry * pneigh_lookup(stru
 	if (!n)
 		goto out;
 
+	n->net = hold_net(net);
 	memcpy(n->key, pkey, key_len);
 	n->dev = dev;
 	if (dev)
@@ -517,7 +522,7 @@ out:
 }
 
 
-int pneigh_delete(struct neigh_table *tbl, const void *pkey,
+int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
 		  struct net_device *dev)
 {
 	struct pneigh_entry *n, **np;
@@ -532,13 +537,15 @@ int pneigh_delete(struct neigh_table *tb
 	write_lock_bh(&tbl->lock);
 	for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
 	     np = &n->next) {
-		if (!memcmp(n->key, pkey, key_len) && n->dev == dev) {
+		if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
+		    (n->net == net)) {
 			*np = n->next;
 			write_unlock_bh(&tbl->lock);
 			if (tbl->pdestructor)
 				tbl->pdestructor(n);
 			if (n->dev)
 				dev_put(n->dev);
+			release_net(n->net);
 			kfree(n);
 			return 0;
 		}
@@ -561,6 +568,7 @@ static int pneigh_ifdown(struct neigh_ta
 					tbl->pdestructor(n);
 				if (n->dev)
 					dev_put(n->dev);
+				release_net(n->net);
 				kfree(n);
 				continue;
 			}
@@ -1261,12 +1269,37 @@ void pneigh_enqueue(struct neigh_table *
 	spin_unlock(&tbl->proxy_queue.lock);
 }
 
+static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
+						      struct net *net, int ifindex)
+{
+	struct neigh_parms *p;
+
+	for (p = &tbl->parms; p; p = p->next) {
+		if (p->net != net)
+			continue;
+		if ((p->dev && p->dev->ifindex == ifindex) ||
+		    (!p->dev && !ifindex))
+			return p;
+	}
+
+	return NULL;
+}
 
 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
 				      struct neigh_table *tbl)
 {
-	struct neigh_parms *p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
+	struct neigh_parms *p, *ref;
+	struct net *net;
+
+	net = &init_net;
+	if (dev)
+		net = dev->nd_net;
+
+	ref = lookup_neigh_params(tbl, net, 0);
+	if (!ref)
+		return NULL;
 
+	p = kmemdup(ref, sizeof(*p), GFP_KERNEL);
 	if (p) {
 		p->tbl		  = tbl;
 		atomic_set(&p->refcnt, 1);
@@ -1282,6 +1315,7 @@ struct neigh_parms *neigh_parms_alloc(st
 			dev_hold(dev);
 			p->dev = dev;
 		}
+		p->net = hold_net(net);
 		p->sysctl_table = NULL;
 		write_lock_bh(&tbl->lock);
 		p->next		= tbl->parms.next;
@@ -1323,6 +1357,7 @@ void neigh_parms_release(struct neigh_ta
 
 void neigh_parms_destroy(struct neigh_parms *parms)
 {
+	release_net(parms->net);
 	kfree(parms);
 }
 
@@ -1333,6 +1368,7 @@ void neigh_table_init_no_netlink(struct 
 	unsigned long now = jiffies;
 	unsigned long phsize;
 
+	tbl->parms.net = &init_net;
 	atomic_set(&tbl->parms.refcnt, 1);
 	INIT_RCU_HEAD(&tbl->parms.rcu_head);
 	tbl->parms.reachable_time =
@@ -1446,9 +1482,6 @@ static int neigh_delete(struct sk_buff *
 	struct net_device *dev = NULL;
 	int err = -EINVAL;
 
-	if (net != &init_net)
-		return -EINVAL;
-
 	if (nlmsg_len(nlh) < sizeof(*ndm))
 		goto out;
 
@@ -1477,7 +1510,7 @@ static int neigh_delete(struct sk_buff *
 			goto out_dev_put;
 
 		if (ndm->ndm_flags & NTF_PROXY) {
-			err = pneigh_delete(tbl, nla_data(dst_attr), dev);
+			err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
 			goto out_dev_put;
 		}
 
@@ -1515,9 +1548,6 @@ static int neigh_add(struct sk_buff *skb
 	struct net_device *dev = NULL;
 	int err;
 
-	if (net != &init_net)
-		return -EINVAL;
-
 	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
 	if (err < 0)
 		goto out;
@@ -1557,7 +1587,7 @@ static int neigh_add(struct sk_buff *skb
 			struct pneigh_entry *pn;
 
 			err = -ENOBUFS;
-			pn = pneigh_lookup(tbl, dst, dev, 1);
+			pn = pneigh_lookup(tbl, net, dst, dev, 1);
 			if (pn) {
 				pn->flags = ndm->ndm_flags;
 				err = 0;
@@ -1752,19 +1782,6 @@ errout:
 	return -EMSGSIZE;
 }
 
-static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl,
-						      int ifindex)
-{
-	struct neigh_parms *p;
-
-	for (p = &tbl->parms; p; p = p->next)
-		if ((p->dev && p->dev->ifindex == ifindex) ||
-		    (!p->dev && !ifindex))
-			return p;
-
-	return NULL;
-}
-
 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
 	[NDTA_NAME]		= { .type = NLA_STRING },
 	[NDTA_THRESH1]		= { .type = NLA_U32 },
@@ -1798,9 +1815,6 @@ static int neightbl_set(struct sk_buff *
 	struct nlattr *tb[NDTA_MAX+1];
 	int err;
 
-	if (net != &init_net)
-		return -EINVAL;
-
 	err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
 			  nl_neightbl_policy);
 	if (err < 0)
@@ -1845,7 +1859,7 @@ static int neightbl_set(struct sk_buff *
 		if (tbp[NDTPA_IFINDEX])
 			ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
 
-		p = lookup_neigh_params(tbl, ifindex);
+		p = lookup_neigh_params(tbl, net, ifindex);
 		if (p == NULL) {
 			err = -ENOENT;
 			goto errout_tbl_lock;
@@ -1926,9 +1940,6 @@ static int neightbl_dump_info(struct sk_
 	int neigh_skip = cb->args[1];
 	struct neigh_table *tbl;
 
-	if (net != &init_net)
-		return 0;
-
 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
 
 	read_lock(&neigh_tbl_lock);
@@ -1943,8 +1954,11 @@ static int neightbl_dump_info(struct sk_
 				       NLM_F_MULTI) <= 0)
 			break;
 
-		for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) {
-			if (nidx < neigh_skip)
+		for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
+			if (net != p->net)
+				continue;
+
+			if (nidx++ < neigh_skip)
 				continue;
 
 			if (neightbl_fill_param_info(skb, tbl, p,
@@ -2020,6 +2034,7 @@ static void neigh_update_notify(struct n
 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
 			    struct netlink_callback *cb)
 {
+	struct net * net = skb->sk->sk_net;
 	struct neighbour *n;
 	int rc, h, s_h = cb->args[1];
 	int idx, s_idx = idx = cb->args[2];
@@ -2030,8 +2045,12 @@ static int neigh_dump_table(struct neigh
 			continue;
 		if (h > s_h)
 			s_idx = 0;
-		for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) {
-			if (idx < s_idx)
+		for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) {
+			int lidx;
+			if (n->dev->nd_net != net)
+				continue;
+			lidx = idx++;
+			if (lidx < s_idx)
 				continue;
 			if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid,
 					    cb->nlh->nlmsg_seq,
@@ -2053,13 +2072,9 @@ out:
 
 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
 {
-	struct net *net = skb->sk->sk_net;
 	struct neigh_table *tbl;
 	int t, family, s_t;
 
-	if (net != &init_net)
-		return 0;
-
 	read_lock(&neigh_tbl_lock);
 	family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
 	s_t = cb->args[0];
@@ -2127,6 +2142,7 @@ EXPORT_SYMBOL(__neigh_for_each_release);
 static struct neighbour *neigh_get_first(struct seq_file *seq)
 {
 	struct neigh_seq_state *state = seq->private;
+	struct net *net = state->net;
 	struct neigh_table *tbl = state->tbl;
 	struct neighbour *n = NULL;
 	int bucket = state->bucket;
@@ -2136,6 +2152,8 @@ static struct neighbour *neigh_get_first
 		n = tbl->hash_buckets[bucket];
 
 		while (n) {
+			if (n->dev->nd_net != net)
+				goto next;
 			if (state->neigh_sub_iter) {
 				loff_t fakep = 0;
 				void *v;
@@ -2165,6 +2183,7 @@ static struct neighbour *neigh_get_next(
 					loff_t *pos)
 {
 	struct neigh_seq_state *state = seq->private;
+	struct net *net = state->net;
 	struct neigh_table *tbl = state->tbl;
 
 	if (state->neigh_sub_iter) {
@@ -2176,6 +2195,8 @@ static struct neighbour *neigh_get_next(
 
 	while (1) {
 		while (n) {
+			if (n->dev->nd_net != net)
+				goto next;
 			if (state->neigh_sub_iter) {
 				void *v = state->neigh_sub_iter(state, n, pos);
 				if (v)
@@ -2222,6 +2243,7 @@ static struct neighbour *neigh_get_idx(s
 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
 {
 	struct neigh_seq_state *state = seq->private;
+	struct net * net = state->net;
 	struct neigh_table *tbl = state->tbl;
 	struct pneigh_entry *pn = NULL;
 	int bucket = state->bucket;
@@ -2229,6 +2251,8 @@ static struct pneigh_entry *pneigh_get_f
 	state->flags |= NEIGH_SEQ_IS_PNEIGH;
 	for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
 		pn = tbl->phash_buckets[bucket];
+		while (pn && (pn->net != net))
+			pn = pn->next;
 		if (pn)
 			break;
 	}
@@ -2242,6 +2266,7 @@ static struct pneigh_entry *pneigh_get_n
 					    loff_t *pos)
 {
 	struct neigh_seq_state *state = seq->private;
+	struct net * net = state->net;
 	struct neigh_table *tbl = state->tbl;
 
 	pn = pn->next;
@@ -2249,6 +2274,8 @@ static struct pneigh_entry *pneigh_get_n
 		if (++state->bucket > PNEIGH_HASHMASK)
 			break;
 		pn = tbl->phash_buckets[state->bucket];
+		while (pn && (pn->net != net))
+			pn = pn->next;
 		if (pn)
 			break;
 	}
@@ -2450,6 +2477,7 @@ static inline size_t neigh_nlmsg_size(vo
 
 static void __neigh_notify(struct neighbour *n, int type, int flags)
 {
+	struct net *net = n->dev->nd_net;
 	struct sk_buff *skb;
 	int err = -ENOBUFS;
 
@@ -2464,10 +2492,10 @@ static void __neigh_notify(struct neighb
 		kfree_skb(skb);
 		goto errout;
 	}
-	err = rtnl_notify(skb, &init_net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
+	err = rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
 errout:
 	if (err < 0)
-		rtnl_set_sk_err(&init_net, RTNLGRP_NEIGH, err);
+		rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
 }
 
 #ifdef CONFIG_ARPD
Index: net-2.6.25/net/decnet/dn_neigh.c
===================================================================
--- net-2.6.25.orig/net/decnet/dn_neigh.c
+++ net-2.6.25/net/decnet/dn_neigh.c
@@ -580,8 +580,8 @@ static const struct seq_operations dn_ne
 
 static int dn_neigh_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &dn_neigh_seq_ops,
-			sizeof(struct neigh_seq_state));
+	return seq_open_net(inode, file, &dn_neigh_seq_ops,
+			    sizeof(struct neigh_seq_state));
 }
 
 static const struct file_operations dn_neigh_seq_fops = {
@@ -589,7 +589,7 @@ static const struct file_operations dn_n
 	.open		= dn_neigh_seq_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= seq_release_private,
+	.release	= seq_release_net,
 };
 
 #endif
Index: net-2.6.25/net/decnet/dn_route.c
===================================================================
--- net-2.6.25.orig/net/decnet/dn_route.c
+++ net-2.6.25/net/decnet/dn_route.c
@@ -984,7 +984,7 @@ source_ok:
 		 * here
 		 */
 		if (!try_hard) {
-			neigh = neigh_lookup_nodev(&dn_neigh_table, &fl.fld_dst);
+			neigh = neigh_lookup_nodev(&dn_neigh_table, &init_net, &fl.fld_dst);
 			if (neigh) {
 				if ((oldflp->oif &&
 				    (neigh->dev->ifindex != oldflp->oif)) ||
Index: net-2.6.25/net/ipv4/arp.c
===================================================================
--- net-2.6.25.orig/net/ipv4/arp.c
+++ net-2.6.25/net/ipv4/arp.c
@@ -838,7 +838,7 @@ static int arp_process(struct sk_buff *s
 		} else if (IN_DEV_FORWARD(in_dev)) {
 			if ((rt->rt_flags&RTCF_DNAT) ||
 			    (addr_type == RTN_UNICAST  && rt->u.dst.dev != dev &&
-			     (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &tip, dev, 0)))) {
+			     (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, &init_net, &tip, dev, 0)))) {
 				n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
 				if (n)
 					neigh_release(n);
@@ -981,7 +981,7 @@ static int arp_req_set_public(struct net
 			return -ENODEV;
 	}
 	if (mask) {
-		if (pneigh_lookup(&arp_tbl, &ip, dev, 1) == NULL)
+		if (pneigh_lookup(&arp_tbl, &init_net, &ip, dev, 1) == NULL)
 			return -ENOBUFS;
 		return 0;
 	}
@@ -1090,7 +1090,7 @@ static int arp_req_delete_public(struct 
 	__be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
 
 	if (mask == htonl(0xFFFFFFFF))
-		return pneigh_delete(&arp_tbl, &ip, dev);
+		return pneigh_delete(&arp_tbl, &init_net, &ip, dev);
 
 	if (mask)
 		return -EINVAL;
@@ -1376,8 +1376,8 @@ static const struct seq_operations arp_s
 
 static int arp_seq_open(struct inode *inode, struct file *file)
 {
-	return seq_open_private(file, &arp_seq_ops,
-			sizeof(struct neigh_seq_state));
+	return seq_open_net(inode, file, &arp_seq_ops,
+			    sizeof(struct neigh_seq_state));
 }
 
 static const struct file_operations arp_seq_fops = {
@@ -1385,7 +1385,7 @@ static const struct file_operations arp_
 	.open           = arp_seq_open,
 	.read           = seq_read,
 	.llseek         = seq_lseek,
-	.release	= seq_release_private,
+	.release	= seq_release_net,
 };
 
 static int __init arp_proc_init(void)
Index: net-2.6.25/net/ipv6/ip6_output.c
===================================================================
--- net-2.6.25.orig/net/ipv6/ip6_output.c
+++ net-2.6.25/net/ipv6/ip6_output.c
@@ -449,7 +449,7 @@ int ip6_forward(struct sk_buff *skb)
 
 	/* XXX: idev->cnf.proxy_ndp? */
 	if (ipv6_devconf.proxy_ndp &&
-	    pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
+	    pneigh_lookup(&nd_tbl, &init_net, &hdr->daddr, skb->dev, 0)) {
 		int proxied = ip6_forward_proxy_check(skb);
 		if (proxied > 0)
 			return ip6_input(skb);
Index: net-2.6.25/net/ipv6/ndisc.c
===================================================================
--- net-2.6.25.orig/net/ipv6/ndisc.c
+++ net-2.6.25/net/ipv6/ndisc.c
@@ -789,7 +789,7 @@ static void ndisc_recv_ns(struct sk_buff
 		if (ipv6_chk_acast_addr(dev, &msg->target) ||
 		    (idev->cnf.forwarding &&
 		     (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) &&
-		     (pneigh = pneigh_lookup(&nd_tbl,
+		     (pneigh = pneigh_lookup(&nd_tbl, &init_net,
 					     &msg->target, dev, 0)) != NULL)) {
 			if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
 			    skb->pkt_type != PACKET_HOST &&
@@ -930,7 +930,7 @@ static void ndisc_recv_na(struct sk_buff
 		 */
 		if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
 		    ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp &&
-		    pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) {
+		    pneigh_lookup(&nd_tbl, &init_net, &msg->target, dev, 0)) {
 			/* XXX: idev->cnf.prixy_ndp */
 			goto out;
 		}

-- 

^ permalink raw reply

* [SOCK] Avoid divides in sk_stream_pages() and __sk_stream_mem_reclaim()
From: Eric Dumazet @ 2007-12-21 14:45 UTC (permalink / raw)
  To: David Miller; +Cc: netdev@vger.kernel.org

Hi David

This is the last one of this boring patch suite.

I prefered to not change sk_forward_alloc type, I prefer to wait 
18 months more before taking this responsibility :)

Merry Christmas everybody

Eric

[SOCK] Avoid divides in sk_stream_pages() and __sk_stream_mem_reclaim()

sk_forward_alloc being signed, we should take care of divides by
SK_STREAM_MEM_QUANTUM we do in sk_stream_pages() and 
__sk_stream_mem_reclaim()

This patchs introduces SK_STREAM_MEM_QUANTUM_SHIFT, defined
as ilog2(SK_STREAM_MEM_QUANTUM), to be able to use right
shifts instead of plain divides.

This should help compiler to choose right shifts instead of
expensive divides (as seen with CONFIG_CC_OPTIMIZE_FOR_SIZE=y on x86)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>

diff --git a/include/net/sock.h b/include/net/sock.h
index 4456453..a134be1 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -716,10 +716,11 @@ extern void __sk_stream_mem_reclaim(struct sock *sk);
 extern int sk_stream_mem_schedule(struct sock *sk, int size, int kind);
 
 #define SK_STREAM_MEM_QUANTUM ((int)PAGE_SIZE)
+#define SK_STREAM_MEM_QUANTUM_SHIFT ilog2(SK_STREAM_MEM_QUANTUM)
 
 static inline int sk_stream_pages(int amt)
 {
-	return DIV_ROUND_UP(amt, SK_STREAM_MEM_QUANTUM);
+	return (amt + SK_STREAM_MEM_QUANTUM - 1) >> SK_STREAM_MEM_QUANTUM_SHIFT;
 }
 
 static inline void sk_stream_mem_reclaim(struct sock *sk)
diff --git a/net/core/stream.c b/net/core/stream.c
index 5586879..bf188ff 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -196,7 +196,7 @@ EXPORT_SYMBOL(sk_stream_error);
 
 void __sk_stream_mem_reclaim(struct sock *sk)
 {
-	atomic_sub(sk->sk_forward_alloc / SK_STREAM_MEM_QUANTUM,
+	atomic_sub(sk->sk_forward_alloc >> SK_STREAM_MEM_QUANTUM_SHIFT,
 		   sk->sk_prot->memory_allocated);
 	sk->sk_forward_alloc &= SK_STREAM_MEM_QUANTUM - 1;
 	if (*sk->sk_prot->memory_pressure &&

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox