Netdev List
 help / color / mirror / Atom feed
* [PATCH v2 03/10] net/fec: add mac field into platform data and consolidate fec_get_mac
From: Shawn Guo @ 2011-01-04  9:24 UTC (permalink / raw)
  To: davem, gerg, baruch, eric, bryan.wu, r64343, B32542,
	u.kleine-koenig
In-Reply-To: <1294133056-21195-1-git-send-email-shawn.guo@freescale.com>

Add mac field into fec_platform_data and consolidate function
fec_get_mac to get mac address in following order.

 1) kernel command line fec_mac=xx:xx:xx...
 2) from flash in case of CONFIG_M5272 or fec_platform_data mac
    field for others, which typically have mac stored in fuse
 3) fec mac address registers set by bootloader

Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
---
 drivers/net/fec.c   |   90 ++++++++++++++++++++++++++++----------------------
 include/linux/fec.h |    2 +
 2 files changed, 52 insertions(+), 40 deletions(-)

diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 47f6b3b..cd59814 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -59,15 +59,9 @@
 #define FEC_ALIGNMENT	0x3
 #endif
 
-/*
- * Define the fixed address of the FEC hardware.
- */
-#if defined(CONFIG_M5272)
-
-static unsigned char	fec_mac_default[] = {
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-};
+static unsigned char fec_mac_default[ETH_ALEN];
 
+#if defined(CONFIG_M5272)
 /*
  * Some hardware gets it MAC address out of local flash memory.
  * if this is non-zero then assume it is the address to get MAC from.
@@ -537,27 +531,40 @@ rx_processing_done:
 }
 
 /* ------------------------------------------------------------------------- */
-#ifdef CONFIG_M5272
 static void __inline__ fec_get_mac(struct net_device *dev)
 {
 	struct fec_enet_private *fep = netdev_priv(dev);
+	struct fec_platform_data *pdata = fep->pdev->dev.platform_data;
 	unsigned char *iap, tmpaddr[ETH_ALEN];
 
-	if (FEC_FLASHMAC) {
-		/*
-		 * Get MAC address from FLASH.
-		 * If it is all 1's or 0's, use the default.
-		 */
-		iap = (unsigned char *)FEC_FLASHMAC;
-		if ((iap[0] == 0) && (iap[1] == 0) && (iap[2] == 0) &&
-		    (iap[3] == 0) && (iap[4] == 0) && (iap[5] == 0))
-			iap = fec_mac_default;
-		if ((iap[0] == 0xff) && (iap[1] == 0xff) && (iap[2] == 0xff) &&
-		    (iap[3] == 0xff) && (iap[4] == 0xff) && (iap[5] == 0xff))
-			iap = fec_mac_default;
-	} else {
-		*((unsigned long *) &tmpaddr[0]) = readl(fep->hwp + FEC_ADDR_LOW);
-		*((unsigned short *) &tmpaddr[4]) = (readl(fep->hwp + FEC_ADDR_HIGH) >> 16);
+	/*
+	 * try to get mac address in following order:
+	 *
+	 * 1) kernel command line fec_mac=xx:xx:xx...
+	 */
+	iap = fec_mac_default;
+
+	/*
+	 * 2) from flash or fuse (via platform data)
+	 */
+	if (!is_valid_ether_addr(iap)) {
+#ifdef CONFIG_M5272
+		if (FEC_FLASHMAC)
+			iap = (unsigned char *)FEC_FLASHMAC;
+#else
+		if (pdata)
+			memcpy(iap, pdata->mac, ETH_ALEN);
+#endif
+	}
+
+	/*
+	 * 3) FEC mac registers set by bootloader
+	 */
+	if (!is_valid_ether_addr(iap)) {
+		*((unsigned long *) &tmpaddr[0]) =
+			be32_to_cpu(readl(fep->hwp + FEC_ADDR_LOW));
+		*((unsigned short *) &tmpaddr[4]) =
+			be16_to_cpu(readl(fep->hwp + FEC_ADDR_HIGH) >> 16);
 		iap = &tmpaddr[0];
 	}
 
@@ -567,7 +574,6 @@ static void __inline__ fec_get_mac(struct net_device *dev)
 	if (iap == fec_mac_default)
 		 dev->dev_addr[ETH_ALEN-1] = fec_mac_default[ETH_ALEN-1] + fep->pdev->id;
 }
-#endif
 
 /* ------------------------------------------------------------------------- */
 
@@ -1063,6 +1069,24 @@ static const struct net_device_ops fec_netdev_ops = {
 	.ndo_do_ioctl           = fec_enet_ioctl,
 };
 
+static int __init fec_mac_addr_setup(char *mac_addr)
+{
+	int i;
+	unsigned int tmp;
+
+	for (i = 0; i < ETH_ALEN; i++) {
+		if (sscanf(mac_addr + 3*i, "%2x", &tmp) != 1) {
+			printk(KERN_WARNING "Malformed fec mac address\n");
+			return 0;
+		}
+		fec_mac_default[i] = tmp;
+	}
+
+	return 1;
+}
+
+__setup("fec_mac=", fec_mac_addr_setup);
+
  /*
   * XXX:  We need to clean up on failure exits here.
   *
@@ -1087,22 +1111,8 @@ static int fec_enet_init(struct net_device *dev)
 	fep->hwp = (void __iomem *)dev->base_addr;
 	fep->netdev = dev;
 
-	/* Set the Ethernet address */
-#ifdef CONFIG_M5272
+	/* Get the Ethernet address */
 	fec_get_mac(dev);
-#else
-	{
-		unsigned long l;
-		l = readl(fep->hwp + FEC_ADDR_LOW);
-		dev->dev_addr[0] = (unsigned char)((l & 0xFF000000) >> 24);
-		dev->dev_addr[1] = (unsigned char)((l & 0x00FF0000) >> 16);
-		dev->dev_addr[2] = (unsigned char)((l & 0x0000FF00) >> 8);
-		dev->dev_addr[3] = (unsigned char)((l & 0x000000FF) >> 0);
-		l = readl(fep->hwp + FEC_ADDR_HIGH);
-		dev->dev_addr[4] = (unsigned char)((l & 0xFF000000) >> 24);
-		dev->dev_addr[5] = (unsigned char)((l & 0x00FF0000) >> 16);
-	}
-#endif
 
 	/* Set receive and transmit descriptor base. */
 	fep->rx_bd_base = cbd_base;
diff --git a/include/linux/fec.h b/include/linux/fec.h
index 5d3523d..bf0c69f 100644
--- a/include/linux/fec.h
+++ b/include/linux/fec.h
@@ -1,6 +1,7 @@
 /* include/linux/fec.h
  *
  * Copyright (c) 2009 Orex Computed Radiography
+ * Copyright (C) 2010 Freescale Semiconductor, Inc.
  *   Baruch Siach <baruch@tkos.co.il>
  *
  * Header file for the FEC platform data
@@ -16,6 +17,7 @@
 
 struct fec_platform_data {
 	phy_interface_t phy;
+	unsigned char mac[ETH_ALEN];
 };
 
 #endif
-- 
1.7.1



^ permalink raw reply related

* [PATCH v2 02/10] net/fec: remove the use of "index" which is legacy
From: Shawn Guo @ 2011-01-04  9:24 UTC (permalink / raw)
  To: davem, gerg, baruch, eric, bryan.wu, r64343, B32542,
	u.kleine-koenig
In-Reply-To: <1294133056-21195-1-git-send-email-shawn.guo@freescale.com>

The "index" becomes legacy since fep->pdev->id starts working
to identify the instance.

Moreover, the call of fec_enet_init(ndev, 0) always passes 0
to fep->index. This makes the following code in fec_get_mac buggy.

	/* Adjust MAC if using default MAC address */
	if (iap == fec_mac_default)
		dev->dev_addr[ETH_ALEN-1] = fec_mac_default[ETH_ALEN-1] + fep->index;

It may be the time to remove "index" and use fep->pdev->id instead.

Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
---
 drivers/net/fec.c |    9 +++------
 1 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 52e9ca8..47f6b3b 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -186,7 +186,6 @@ struct fec_enet_private {
 	int     mii_timeout;
 	uint    phy_speed;
 	phy_interface_t	phy_interface;
-	int	index;
 	int	link;
 	int	full_duplex;
 	struct	completion mdio_done;
@@ -566,7 +565,7 @@ static void __inline__ fec_get_mac(struct net_device *dev)
 
 	/* Adjust MAC if using default MAC address */
 	if (iap == fec_mac_default)
-		 dev->dev_addr[ETH_ALEN-1] = fec_mac_default[ETH_ALEN-1] + fep->index;
+		 dev->dev_addr[ETH_ALEN-1] = fec_mac_default[ETH_ALEN-1] + fep->pdev->id;
 }
 #endif
 
@@ -1067,9 +1066,8 @@ static const struct net_device_ops fec_netdev_ops = {
  /*
   * XXX:  We need to clean up on failure exits here.
   *
-  * index is only used in legacy code
   */
-static int fec_enet_init(struct net_device *dev, int index)
+static int fec_enet_init(struct net_device *dev)
 {
 	struct fec_enet_private *fep = netdev_priv(dev);
 	struct bufdesc *cbd_base;
@@ -1086,7 +1084,6 @@ static int fec_enet_init(struct net_device *dev, int index)
 
 	spin_lock_init(&fep->hw_lock);
 
-	fep->index = index;
 	fep->hwp = (void __iomem *)dev->base_addr;
 	fep->netdev = dev;
 
@@ -1316,7 +1313,7 @@ fec_probe(struct platform_device *pdev)
 	}
 	clk_enable(fep->clk);
 
-	ret = fec_enet_init(ndev, 0);
+	ret = fec_enet_init(ndev);
 	if (ret)
 		goto failed_init;
 
-- 
1.7.1



^ permalink raw reply related

* [PATCH v2 01/10] net/fec: fix MMFR_OP type in fec_enet_mdio_write
From: Shawn Guo @ 2011-01-04  9:24 UTC (permalink / raw)
  To: davem, gerg, baruch, eric, bryan.wu, r64343, B32542,
	u.kleine-koenig
In-Reply-To: <1294133056-21195-1-git-send-email-shawn.guo@freescale.com>

FEC_MMFR_OP_WRITE should be used than FEC_MMFR_OP_READ in
a mdio write operation.

It's probably a typo introduced by commit:

e6b043d512fa8d9a3801bf5d72bfa3b8fc3b3cc8
netdev/fec.c: add phylib supporting to enable carrier detection (v2)

Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
---
 drivers/net/fec.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index cce32d4..52e9ca8 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -651,8 +651,8 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 	fep->mii_timeout = 0;
 	init_completion(&fep->mdio_done);
 
-	/* start a read op */
-	writel(FEC_MMFR_ST | FEC_MMFR_OP_READ |
+	/* start a write op */
+	writel(FEC_MMFR_ST | FEC_MMFR_OP_WRITE |
 		FEC_MMFR_PA(mii_id) | FEC_MMFR_RA(regnum) |
 		FEC_MMFR_TA | FEC_MMFR_DATA(value),
 		fep->hwp + FEC_MII_DATA);
-- 
1.7.1



^ permalink raw reply related

* Re: 'tcp: bind() fix when many ports are bound' problem
From: Gaspar Chilingarov @ 2011-01-04  9:12 UTC (permalink / raw)
  To: Daniel Baluta; +Cc: netdev, eric.dumazet
In-Reply-To: <AANLkTimrpZOWEc5qXPEbrJbt2Nmh2vY69aC1Dwf1cCPH@mail.gmail.com>

Hi there!

Well, that looks strange.

On my own side I've just put workaround (manually binding to all ports
in sequence :)
and moved production code to FreeBSD as it has better scalable network stack.

I can see the potential problem with that bind() problem on highly
loaded DNS servers/resolvers which establish tons of outgoing UDP
connections.

In some cases that connections could fail and as not receiving the
answer it is normal condition for DNS this will go totally unnoticed.

I don't think anyone will hit this bug in production environment
except the very high load applications.

/Gaspar

2011/1/4 Daniel Baluta <daniel.baluta@gmail.com>:
> Hi,
>
> After a series of discussions [1], Eric provided
> "tcp: bind() fix when many ports are bound" patch. ([2])
>
> Anyhow, due to this problem ([3]) it was reverted.
> Where there any follow ups on this patch?
>
> I have spent some time looking at inet_csk_get_port with the
> only conclusion that it's scary :D.
>
> Should I work around patch "tcp: bind() fix when many ports are bound",
> and try to fix problem [3], or is that a dead end?
>
> thanks,
> Daniel.
>
> [1] http://kerneltrap.org/mailarchive/linux-netdev/2010/4/20/6275120
> [2] http://kerneltrap.org/mailarchive/git-commits-head/2010/4/24/32191
> [3] http://kerneltrap.org/mailarchive/linux-kernel/2010/4/28/4563937
>



-- 
Gaspar Chilingarov

tel +37493 419763 (mobile - leave voice mail message)
icq 63174784
skype://gasparch
e mailto:nm@web.am mailto:gasparch@gmail.com
w http://gasparchilingarov.com/

^ permalink raw reply

* 'tcp: bind() fix when many ports are bound' problem
From: Daniel Baluta @ 2011-01-04  8:53 UTC (permalink / raw)
  To: netdev; +Cc: eric.dumazet, gasparch

Hi,

After a series of discussions [1], Eric provided
"tcp: bind() fix when many ports are bound" patch. ([2])

Anyhow, due to this problem ([3]) it was reverted.
Where there any follow ups on this patch?

I have spent some time looking at inet_csk_get_port with the
only conclusion that it's scary :D.

Should I work around patch "tcp: bind() fix when many ports are bound",
and try to fix problem [3], or is that a dead end?

thanks,
Daniel.

[1] http://kerneltrap.org/mailarchive/linux-netdev/2010/4/20/6275120
[2] http://kerneltrap.org/mailarchive/git-commits-head/2010/4/24/32191
[3] http://kerneltrap.org/mailarchive/linux-kernel/2010/4/28/4563937

^ permalink raw reply

* Re: [PATCH] ipv4/route.c: respect prefsrc for local routes
From: Eric Dumazet @ 2011-01-04  8:40 UTC (permalink / raw)
  To: Changli Gao; +Cc: Joe Perches, Joel Sing, netdev
In-Reply-To: <1294130315.2711.48.camel@edumazet-laptop>

Le mardi 04 janvier 2011 à 09:38 +0100, Eric Dumazet a écrit :

> This could be properly done using another macro in include/net/ip_fib.h
> to centralize this ternary op in one point :
> 
> #define __FIB_RES_PREFSRC(res, default) ((res).fi->fib_prefsrc ? : default)
> #define FIB_RES_PREFSRC(res) __FIB_RES_PREFSRC(res, default, __fib_res_prefsrc(&res)

I meant

#define FIB_RES_PREFSRC(res) __FIB_RES_PREFSRC(res, __fib_res_prefsrc(&res))




^ permalink raw reply

* Re: [PATCH] ipv4/route.c: respect prefsrc for local routes
From: Eric Dumazet @ 2011-01-04  8:38 UTC (permalink / raw)
  To: Changli Gao; +Cc: Joe Perches, Joel Sing, netdev
In-Reply-To: <AANLkTimDms0FTmXWf9WSpcxwZXVVSXQj44jZ2Ag8Osuk@mail.gmail.com>

Le mardi 04 janvier 2011 à 16:07 +0800, Changli Gao a écrit :
> On Tue, Jan 4, 2011 at 3:33 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> > Le mardi 04 janvier 2011 à 08:24 +0100, Eric Dumazet a écrit :
> >
> >> Please use FIB_RES_PREFSRC(res)
> >>
> >
> > Hmm no, this is not applicable, but this could be shorter :
> >
> > fl.fl4_src = res.fi->fib_prefsrc ? : fl.fl4_dst;
> >
> >
> 
> I think Joe may object the use of "? :"
> 

Ternary operator is standard C idiom, used in networking stuff, for
example in FIB_RES_PREFSRC() ;)

This could be properly done using another macro in include/net/ip_fib.h
to centralize this ternary op in one point :

#define __FIB_RES_PREFSRC(res, default) ((res).fi->fib_prefsrc ? : default)
#define FIB_RES_PREFSRC(res) __FIB_RES_PREFSRC(res, default, __fib_res_prefsrc(&res))




^ permalink raw reply

* Re: bridge not routing packets via source bridgeport
From: Sebastian J. Bronner @ 2011-01-04  8:25 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, Daniel Kraft
In-Reply-To: <1294080135.2892.1152.camel@edumazet-laptop>

On 03.01.2011 19:42, Eric Dumazet wrote:
> Le lundi 03 janvier 2011 à 18:52 +0100, Sebastian J. Bronner a écrit :
>> we recently upgraded from 2.6.32.25 to 2.6.35.24 and discovered that our
>> virtual machines can no longer access their own external IP addresses.
>> Testing revealed that 2.6.34 was the last version not to have the
>> problem. 2.6.36 still had it. But on to the details.

[ Details snipped ]

> random guess: maybe rp_filter hits you ?
> 
> 
> With 2.6.36, a new SNMP counter was added, 
> "netstat -s | grep IPReversePathFilter"

Thanks for your guess, Eric.

Unfortunately, that didn't change anything.

According to the documentation at

http://lartc.org/howto/lartc.kernel.html

that parameter is to prevent _responses_ leaving through a different
interface that the original packet came in on. So, I wouldn't expect it
to have any impact on _routed packets_.

In my original e-mail, I forgot to mention something I noticed while
debugging:

If i put the bridge into promiscuous mode, then suddenly the packets are
routed.

# ip link set promisc on dev virbr1

Cheers,
Sebastian
-- 
*Sebastian J. Bronner*
Administrator

D9T GmbH - Magirusstr. 39/1 - D-89077 Ulm
Tel: +49 731 1411 696-0 - Fax: +49 731 3799-220

Geschäftsführer: Daniel Kraft
Sitz und Register: Ulm, HRB 722416
Ust.IdNr: DE 260484638

http://d9t.de - D9T High Performance Hosting
info@d9t.de

^ permalink raw reply

* Re: [PATCH] ipv4/route.c: respect prefsrc for local routes
From: Changli Gao @ 2011-01-04  8:07 UTC (permalink / raw)
  To: Eric Dumazet, Joe Perches; +Cc: Joel Sing, netdev
In-Reply-To: <1294126407.2711.39.camel@edumazet-laptop>

On Tue, Jan 4, 2011 at 3:33 PM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
> Le mardi 04 janvier 2011 à 08:24 +0100, Eric Dumazet a écrit :
>
>> Please use FIB_RES_PREFSRC(res)
>>
>
> Hmm no, this is not applicable, but this could be shorter :
>
> fl.fl4_src = res.fi->fib_prefsrc ? : fl.fl4_dst;
>
>

I think Joe may object the use of "? :"

-- 
Regards,
Changli Gao(xiaosuo@gmail.com)

^ permalink raw reply

* Re: [PATCH] ipv4/route.c: respect prefsrc for local routes
From: Eric Dumazet @ 2011-01-04  7:33 UTC (permalink / raw)
  To: Joel Sing; +Cc: netdev
In-Reply-To: <1294125880.2711.34.camel@edumazet-laptop>

Le mardi 04 janvier 2011 à 08:24 +0100, Eric Dumazet a écrit :

> Please use FIB_RES_PREFSRC(res)
> 

Hmm no, this is not applicable, but this could be shorter :

fl.fl4_src = res.fi->fib_prefsrc ? : fl.fl4_dst;




^ permalink raw reply

* Re: [PATCH] ipv4/route.c: respect prefsrc for local routes
From: Eric Dumazet @ 2011-01-04  7:24 UTC (permalink / raw)
  To: Joel Sing; +Cc: netdev
In-Reply-To: <1294122260-13245-1-git-send-email-jsing@google.com>

Le mardi 04 janvier 2011 à 17:24 +1100, Joel Sing a écrit :
> The preferred source address is currently ignored for local routes,
> which results in all local connections having a src address that is the
> same as the local dst address. Fix this by respecting the preferred source
> address when it is provided for local routes.
> 
> This bug can be demonstrated as follows:
> 
>  # ifconfig dummy0 192.168.0.1
>  # ip route show table local | grep local.*dummy0
>  local 192.168.0.1 dev dummy0  proto kernel  scope host  src 192.168.0.1
>  # ip route change table local local 192.168.0.1 dev dummy0 \
>      proto kernel scope host src 127.0.0.1
>  # ip route show table local | grep local.*dummy0
>  local 192.168.0.1 dev dummy0  proto kernel  scope host  src 127.0.0.1
> 
> We now establish a local connection and verify the source IP
> address selection:
> 
>  # nc -l 192.168.0.1 3128 &
>  # nc 192.168.0.1 3128 &
>  # netstat -ant | grep 192.168.0.1:3128.*EST
>  tcp        0      0 192.168.0.1:3128        192.168.0.1:33228 ESTABLISHED
>  tcp        0      0 192.168.0.1:33228       192.168.0.1:3128  ESTABLISHED
> 
> Signed-off-by: Joel Sing <jsing@google.com>
> ---
>  net/ipv4/route.c |    8 ++++++--
>  1 files changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/net/ipv4/route.c b/net/ipv4/route.c
> index df948b0..93bfd95 100644
> --- a/net/ipv4/route.c
> +++ b/net/ipv4/route.c
> @@ -2649,8 +2649,12 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
>  	}
>  
>  	if (res.type == RTN_LOCAL) {
> -		if (!fl.fl4_src)
> -			fl.fl4_src = fl.fl4_dst;
> +		if (!fl.fl4_src) {
> +			if (res.fi->fib_prefsrc)
> +				fl.fl4_src = res.fi->fib_prefsrc;
> +			else
> +				fl.fl4_src = fl.fl4_dst;
> +		}
>  		dev_out = net->loopback_dev;
>  		fl.oif = dev_out->ifindex;
>  		res.fi = NULL;

Please use FIB_RES_PREFSRC(res)

as we do a few lines after ;)

Thanks



^ permalink raw reply

* Re: [net-next-2.6 PATCH v4 2/2] net_sched: implement a root container qdisc sch_mclass
From: Eric Dumazet @ 2011-01-04  6:46 UTC (permalink / raw)
  To: John Fastabend
  Cc: davem, jarkao2, hadi, shemminger, tgraf, bhutchings, nhorman,
	netdev
In-Reply-To: <20110104030558.13187.27076.stgit@jf-dev1-dcblab>

Le lundi 03 janvier 2011 à 19:05 -0800, John Fastabend a écrit :
> This implements a mqprio queueing discipline that by default creates
> a pfifo_fast qdisc per tx queue and provides the needed configuration
> interface.


> +static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
> +{
> +	struct net_device *dev = qdisc_dev(sch);
> +	struct mqprio_sched *priv = qdisc_priv(sch);
> +	unsigned char *b = skb_tail_pointer(skb);
> +	struct tc_mqprio_qopt opt;
> +	struct Qdisc *qdisc;
> +	unsigned int i;
> +
> +	sch->q.qlen = 0;
> +	memset(&sch->bstats, 0, sizeof(sch->bstats));
> +	memset(&sch->qstats, 0, sizeof(sch->qstats));
> +
> +	for (i = 0; i < dev->num_tx_queues; i++) {
> +		qdisc = netdev_get_tx_queue(dev, i)->qdisc;
> +		spin_lock_bh(qdisc_lock(qdisc));
> +		sch->q.qlen		+= qdisc->q.qlen;
> +		sch->bstats.bytes	+= qdisc->bstats.bytes;
> +		sch->bstats.packets	+= qdisc->bstats.packets;
> +		sch->qstats.qlen	+= qdisc->qstats.qlen;
> +		sch->qstats.backlog	+= qdisc->qstats.backlog;
> +		sch->qstats.drops	+= qdisc->qstats.drops;
> +		sch->qstats.requeues	+= qdisc->qstats.requeues;
> +		sch->qstats.overlimits	+= qdisc->qstats.overlimits;
> +		spin_unlock_bh(qdisc_lock(qdisc));
> +	}
> +

I understand this code already exists in mq, I just want to note that
some qdiscs update their stats in their dump() subroutine, because their
enqueue()/dequeue() doesnt update all fields.

We might add a gather_stats() method, eventually, to get rid of all
oddities we currently have with 0 backlogs (or qlen) here and here ;)

For example, I am not even sure qdisc->qstats.qlen should not be
replaced by to qdisc->qstats.qlen in your loop, as done in
mqprio_dump_class_stats()

Reviewed-by: Eric Dumazet <eric.dumazet@gmail.com>



^ permalink raw reply

* [PATCH] ipv4/route.c: respect prefsrc for local routes
From: Joel Sing @ 2011-01-04  6:24 UTC (permalink / raw)
  To: netdev; +Cc: Joel Sing

The preferred source address is currently ignored for local routes,
which results in all local connections having a src address that is the
same as the local dst address. Fix this by respecting the preferred source
address when it is provided for local routes.

This bug can be demonstrated as follows:

 # ifconfig dummy0 192.168.0.1
 # ip route show table local | grep local.*dummy0
 local 192.168.0.1 dev dummy0  proto kernel  scope host  src 192.168.0.1
 # ip route change table local local 192.168.0.1 dev dummy0 \
     proto kernel scope host src 127.0.0.1
 # ip route show table local | grep local.*dummy0
 local 192.168.0.1 dev dummy0  proto kernel  scope host  src 127.0.0.1

We now establish a local connection and verify the source IP
address selection:

 # nc -l 192.168.0.1 3128 &
 # nc 192.168.0.1 3128 &
 # netstat -ant | grep 192.168.0.1:3128.*EST
 tcp        0      0 192.168.0.1:3128        192.168.0.1:33228 ESTABLISHED
 tcp        0      0 192.168.0.1:33228       192.168.0.1:3128  ESTABLISHED

Signed-off-by: Joel Sing <jsing@google.com>
---
 net/ipv4/route.c |    8 ++++++--
 1 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index df948b0..93bfd95 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2649,8 +2649,12 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp,
 	}
 
 	if (res.type == RTN_LOCAL) {
-		if (!fl.fl4_src)
-			fl.fl4_src = fl.fl4_dst;
+		if (!fl.fl4_src) {
+			if (res.fi->fib_prefsrc)
+				fl.fl4_src = res.fi->fib_prefsrc;
+			else
+				fl.fl4_src = fl.fl4_dst;
+		}
 		dev_out = net->loopback_dev;
 		fl.oif = dev_out->ifindex;
 		res.fi = NULL;
-- 
1.7.3.1


^ permalink raw reply related

* Re: [RFC PATCH 0/3] Simplified 16 bit Toeplitz hash algorithm
From: Tom Herbert @ 2011-01-04  3:25 UTC (permalink / raw)
  To: Ben Hutchings; +Cc: Alexander Duyck, David Miller, netdev@vger.kernel.org
In-Reply-To: <1294085724.3167.202.camel@localhost>

>> The general idea is to at least keep the traffic local to one TX/RX
>> queue pair so that if we cannot match the queue pair to the application,
>> perhaps the application can be affinitized to match up with the queue
>> pair.  Otherwise we end up with traffic getting routed to one TX queue
>> on one CPU, and the RX being routed to another queue on perhaps a
>> different CPU and it becomes quite difficult to match up the queues and
>> the applications.
>
> Right.  That certainly seems like a Good Thing, though I believe it can
> be implemented generically by recording the RX queue number on the
> socket:
>
> http://article.gmane.org/gmane.linux.network/158477
>
I still don't see the value in doing this RX/TX queue pairing (unless
you're considering the possibility of explicitly binding sockets to
queue pairs).  XPS should be sufficient mechanism to get affinity on
sending side.  Also, don't know how the queue paring model will be
maintained when using priority queues on transmit-- transmit is likely
to be asymmetric to receive side.  The ability to seamlessly decouple
transmit queues and receive queues seems like a nice property.

>> Since the approach is based on Toeplitz it can be applied to all
>> hardware capable of generating a Toeplitz based hash and as a result it
>> would likely also work in a much more vendor neutral kind of way than
>> Flow Director currently does.
>
The device hash should already be available in sk_rxhash, so maybe
that could be used for this purpose.  I think it is a good property to
keeping treat the device hashes as opaque values, any reasonable
32-bit 4-tuple hash should work equally well in the stack.

^ permalink raw reply

* linux-next: manual merge of the trivial tree with the net tree
From: Stephen Rothwell @ 2011-01-04  3:22 UTC (permalink / raw)
  To: Jiri Kosina
  Cc: linux-next, linux-kernel, Justin P. Mattock, Wey-Yi Guy,
	John W. Linville, David Miller, netdev

[-- Attachment #1: Type: text/plain, Size: 619 bytes --]

Hi Jiri,

Today's linux-next merge of the trivial tree got a conflict in
drivers/net/wireless/iwlwifi/iwl-core.c between commit
81baf6ec9c190ae128748cf2a026bff5cb811b70 ("iwlwifi: Legacy isr only used
by legacy devices") from the net tree and commit
62e45c14fb9a978dca6c7a5dc8372cc8ea2f42c8 ("wireless: comment typo fix
diable -> disable") from the trivial tree.

The former moves the code modified by the latter to
drivers/net/wireless/iwlwifi/iwl-legacy.c. I didn't bother refixing the
typo there.
-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/

[-- Attachment #2: Type: application/pgp-signature, Size: 490 bytes --]

^ permalink raw reply

* [net-next-2.6 PATCH v4 2/2] net_sched: implement a root container qdisc sch_mclass
From: John Fastabend @ 2011-01-04  3:05 UTC (permalink / raw)
  To: davem, jarkao2
  Cc: john.r.fastabend, hadi, shemminger, tgraf, eric.dumazet,
	bhutchings, nhorman, netdev
In-Reply-To: <20110104030553.13187.69135.stgit@jf-dev1-dcblab>

This implements a mqprio queueing discipline that by default creates
a pfifo_fast qdisc per tx queue and provides the needed configuration
interface.

Using the mqprio qdisc the number of tcs currently in use along
with the range of queues alloted to each class can be configured. By
default skbs are mapped to traffic classes using the skb priority.
This mapping is configurable.

Configurable parameters,

struct tc_mclass_qopt {
        __u8    num_tc;
        __u8    prio_tc_map[16];
        __u8    hw;
        __u16   count[16];
        __u16   offset[16];
};

Here the count/offset pairing give the queue alignment and the
prio_tc_map gives the mapping from skb->priority to tc.

The hw bit determines if the hardware should configure the count
and offset values. If the hardware bit is set then the operation
will fail if the hardware does not implement the ndo_setup_tc
operation. This is to avoid undetermined states where the hardware
may or may not control the queue mapping. Also minimal bounds
checking is done on the count/offset to verify a queue does not
exceed num_tx_queues and that queue ranges do not overlap. Otherwise
it is left to user policy or hardware configuration to create
useful mappings.

It is expected that hardware QOS schemes can be implemented by
creating appropriate mappings of queues in ndo_tc_setup().

One expected use case is drivers will use the ndo_setup_tc to map
queue ranges onto 802.1Q traffic classes. This provides a generic
mechanism to map network traffic onto these traffic classes and
removes the need for lower layer drivers to no specifics about
traffic types.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---

 include/linux/netdevice.h |    3 
 include/linux/pkt_sched.h |    9 +
 net/sched/Kconfig         |   10 +
 net/sched/Makefile        |    1 
 net/sched/sch_generic.c   |    4 +
 net/sched/sch_mqprio.c    |  357 +++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 384 insertions(+), 0 deletions(-)
 create mode 100644 net/sched/sch_mqprio.c

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 073c48d..f90a863 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -764,6 +764,8 @@ struct netdev_tc_txq {
  * int (*ndo_set_vf_port)(struct net_device *dev, int vf,
  *			  struct nlattr *port[]);
  * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
+ *
+ * int (*ndo_setup_tc)(struct net_device *dev, int tc);
  */
 #define HAVE_NET_DEVICE_OPS
 struct net_device_ops {
@@ -822,6 +824,7 @@ struct net_device_ops {
 						   struct nlattr *port[]);
 	int			(*ndo_get_vf_port)(struct net_device *dev,
 						   int vf, struct sk_buff *skb);
+	int			(*ndo_setup_tc)(struct net_device *dev, u8 tc);
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	int			(*ndo_fcoe_enable)(struct net_device *dev);
 	int			(*ndo_fcoe_disable)(struct net_device *dev);
diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 2cfa4bc..8e29fa6 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -481,4 +481,13 @@ struct tc_drr_stats {
 	__u32	deficit;
 };
 
+/* MCLASS */
+struct tc_mqprio_qopt {
+	__u8	num_tc;
+	__u8	prio_tc_map[16];
+	__u8	hw;
+	__u16	count[16];
+	__u16	offset[16];
+};
+
 #endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index a36270a..e42853b 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -205,6 +205,16 @@ config NET_SCH_DRR
 
 	  If unsure, say N.
 
+config NET_SCH_MQPRIO
+	tristate "Multi-queue priority scheduler (MQPRIO)"
+	help
+	  Say Y here if you want to use the Multi-queue Priority scheduler.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called sch_mqprio.
+
+	  If unsure, say N.
+
 config NET_SCH_INGRESS
 	tristate "Ingress Qdisc"
 	depends on NET_CLS_ACT
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 960f5db..26ce681 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -32,6 +32,7 @@ obj-$(CONFIG_NET_SCH_MULTIQ)	+= sch_multiq.o
 obj-$(CONFIG_NET_SCH_ATM)	+= sch_atm.o
 obj-$(CONFIG_NET_SCH_NETEM)	+= sch_netem.o
 obj-$(CONFIG_NET_SCH_DRR)	+= sch_drr.o
+obj-$(CONFIG_NET_SCH_MQPRIO)	+= sch_mqprio.o
 obj-$(CONFIG_NET_CLS_U32)	+= cls_u32.o
 obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o
 obj-$(CONFIG_NET_CLS_FW)	+= cls_fw.o
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 34dc598..723b278 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -540,6 +540,7 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
 	.dump		=	pfifo_fast_dump,
 	.owner		=	THIS_MODULE,
 };
+EXPORT_SYMBOL(pfifo_fast_ops);
 
 struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 			  struct Qdisc_ops *ops)
@@ -674,6 +675,7 @@ struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
 
 	return oqdisc;
 }
+EXPORT_SYMBOL(dev_graft_qdisc);
 
 static void attach_one_default_qdisc(struct net_device *dev,
 				     struct netdev_queue *dev_queue,
@@ -761,6 +763,7 @@ void dev_activate(struct net_device *dev)
 		dev_watchdog_up(dev);
 	}
 }
+EXPORT_SYMBOL(dev_activate);
 
 static void dev_deactivate_queue(struct net_device *dev,
 				 struct netdev_queue *dev_queue,
@@ -840,6 +843,7 @@ void dev_deactivate(struct net_device *dev)
 	list_add(&dev->unreg_list, &single);
 	dev_deactivate_many(&single);
 }
+EXPORT_SYMBOL(dev_deactivate);
 
 static void dev_init_scheduler_queue(struct net_device *dev,
 				     struct netdev_queue *dev_queue,
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
new file mode 100644
index 0000000..e9e74c7
--- /dev/null
+++ b/net/sched/sch_mqprio.c
@@ -0,0 +1,357 @@
+/*
+ * net/sched/sch_mqprio.c
+ *
+ * Copyright (c) 2010 John Fastabend <john.r.fastabend@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/sch_generic.h>
+
+struct mqprio_sched {
+	struct Qdisc		**qdiscs;
+	int hw_owned;
+};
+
+static void mqprio_destroy(struct Qdisc *sch)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct mqprio_sched *priv = qdisc_priv(sch);
+	unsigned int ntx;
+
+	if (!priv->qdiscs)
+		return;
+
+	for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++)
+		qdisc_destroy(priv->qdiscs[ntx]);
+
+	if (priv->hw_owned && dev->netdev_ops->ndo_setup_tc)
+		dev->netdev_ops->ndo_setup_tc(dev, 0);
+	else
+		netdev_set_num_tc(dev, 0);
+
+	kfree(priv->qdiscs);
+}
+
+static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
+{
+	int i, j;
+
+	/* Verify num_tc is not out of max range */
+	if (qopt->num_tc > TC_MAX_QUEUE)
+		return -EINVAL;
+
+	for (i = 0; i < qopt->num_tc; i++) {
+		unsigned int last = qopt->offset[i] + qopt->count[i];
+		/* Verify the queue offset is in the num tx range */
+		if (qopt->offset[i] >= dev->num_tx_queues)
+			return -EINVAL;
+		/* Verify the queue count is in tx range being equal to the
+		 * num_tx_queues indicates the last queue is in use.
+		 */
+		else if (last > dev->num_tx_queues)
+			return -EINVAL;
+
+		/* Verify that the offset and counts do not overlap */
+		for (j = i + 1; j < qopt->num_tc; j++) {
+			if (last > qopt->offset[j])
+				return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int mqprio_init(struct Qdisc *sch, struct nlattr *opt)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct mqprio_sched *priv = qdisc_priv(sch);
+	struct netdev_queue *dev_queue;
+	struct Qdisc *qdisc;
+	int i, err = -EOPNOTSUPP;
+	struct tc_mqprio_qopt *qopt = NULL;
+
+	/* Unwind attributes on failure */
+	u8 unwnd_tc = dev->num_tc;
+	u8 unwnd_map[TC_BITMASK+1];
+	struct netdev_tc_txq unwnd_txq[TC_MAX_QUEUE];
+
+	if (sch->parent != TC_H_ROOT)
+		return -EOPNOTSUPP;
+
+	if (!netif_is_multiqueue(dev))
+		return -EOPNOTSUPP;
+
+	if (nla_len(opt) < sizeof(*qopt))
+		return -EINVAL;
+	qopt = nla_data(opt);
+
+	memcpy(unwnd_map, dev->prio_tc_map, sizeof(unwnd_map));
+	memcpy(unwnd_txq, dev->tc_to_txq, sizeof(unwnd_txq));
+
+	/* If the mqprio options indicate that hardware should own
+	 * the queue mapping then run ndo_setup_tc if this can not
+	 * be done fail immediately.
+	 */
+	if (qopt->hw && dev->netdev_ops->ndo_setup_tc) {
+		priv->hw_owned = 1;
+		err = dev->netdev_ops->ndo_setup_tc(dev, qopt->num_tc);
+		if (err)
+			return err;
+	} else if (!qopt->hw) {
+		if (mqprio_parse_opt(dev, qopt))
+			return -EINVAL;
+
+		if (netdev_set_num_tc(dev, qopt->num_tc))
+			return -EINVAL;
+
+		for (i = 0; i < qopt->num_tc; i++)
+			netdev_set_tc_queue(dev, i,
+					    qopt->count[i], qopt->offset[i]);
+	} else {
+		return -EINVAL;
+	}
+
+	/* Always use supplied priority mappings */
+	for (i = 0; i < TC_BITMASK+1; i++) {
+		if (netdev_set_prio_tc_map(dev, i, qopt->prio_tc_map[i])) {
+			err = -EINVAL;
+			goto tc_err;
+		}
+	}
+
+	/* pre-allocate qdisc, attachment can't fail */
+	priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]),
+			       GFP_KERNEL);
+	if (priv->qdiscs == NULL) {
+		err = -ENOMEM;
+		goto tc_err;
+	}
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		dev_queue = netdev_get_tx_queue(dev, i);
+		qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops,
+					  TC_H_MAKE(TC_H_MAJ(sch->handle),
+						    TC_H_MIN(i + 1)));
+		if (qdisc == NULL) {
+			err = -ENOMEM;
+			goto err;
+		}
+		qdisc->flags |= TCQ_F_CAN_BYPASS;
+		priv->qdiscs[i] = qdisc;
+	}
+
+	sch->flags |= TCQ_F_MQROOT;
+	return 0;
+
+err:
+	mqprio_destroy(sch);
+tc_err:
+	if (priv->hw_owned)
+		dev->netdev_ops->ndo_setup_tc(dev, unwnd_tc);
+	else
+		netdev_set_num_tc(dev, unwnd_tc);
+
+	memcpy(dev->prio_tc_map, unwnd_map, sizeof(unwnd_map));
+	memcpy(dev->tc_to_txq, unwnd_txq, sizeof(unwnd_txq));
+
+	return err;
+}
+
+static void mqprio_attach(struct Qdisc *sch)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct mqprio_sched *priv = qdisc_priv(sch);
+	struct Qdisc *qdisc;
+	unsigned int ntx;
+
+	/* Attach underlying qdisc */
+	for (ntx = 0; ntx < dev->num_tx_queues; ntx++) {
+		qdisc = priv->qdiscs[ntx];
+		qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc);
+		if (qdisc)
+			qdisc_destroy(qdisc);
+	}
+	kfree(priv->qdiscs);
+	priv->qdiscs = NULL;
+}
+
+static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch,
+					     unsigned long cl)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	unsigned long ntx = cl - 1;
+
+	if (ntx >= dev->num_tx_queues)
+		return NULL;
+	return netdev_get_tx_queue(dev, ntx);
+}
+
+static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new,
+		    struct Qdisc **old)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+
+	if (dev->flags & IFF_UP)
+		dev_deactivate(dev);
+
+	*old = dev_graft_qdisc(dev_queue, new);
+
+	if (dev->flags & IFF_UP)
+		dev_activate(dev);
+
+	return 0;
+}
+
+static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	struct mqprio_sched *priv = qdisc_priv(sch);
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tc_mqprio_qopt opt;
+	struct Qdisc *qdisc;
+	unsigned int i;
+
+	sch->q.qlen = 0;
+	memset(&sch->bstats, 0, sizeof(sch->bstats));
+	memset(&sch->qstats, 0, sizeof(sch->qstats));
+
+	for (i = 0; i < dev->num_tx_queues; i++) {
+		qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+		spin_lock_bh(qdisc_lock(qdisc));
+		sch->q.qlen		+= qdisc->q.qlen;
+		sch->bstats.bytes	+= qdisc->bstats.bytes;
+		sch->bstats.packets	+= qdisc->bstats.packets;
+		sch->qstats.qlen	+= qdisc->qstats.qlen;
+		sch->qstats.backlog	+= qdisc->qstats.backlog;
+		sch->qstats.drops	+= qdisc->qstats.drops;
+		sch->qstats.requeues	+= qdisc->qstats.requeues;
+		sch->qstats.overlimits	+= qdisc->qstats.overlimits;
+		spin_unlock_bh(qdisc_lock(qdisc));
+	}
+
+	opt.num_tc = dev->num_tc;
+	memcpy(opt.prio_tc_map, dev->prio_tc_map, 16);
+	opt.hw = priv->hw_owned;
+
+	for (i = 0; i < dev->num_tc; i++) {
+		opt.count[i] = dev->tc_to_txq[i].count;
+		opt.offset[i] = dev->tc_to_txq[i].offset;
+	}
+
+	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+	return skb->len;
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl)
+{
+	struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+
+	return dev_queue->qdisc_sleeping;
+}
+
+static unsigned long mqprio_get(struct Qdisc *sch, u32 classid)
+{
+	unsigned int ntx = TC_H_MIN(classid);
+
+	if (!mqprio_queue_get(sch, ntx))
+		return 0;
+	return ntx;
+}
+
+static void mqprio_put(struct Qdisc *sch, unsigned long cl)
+{
+}
+
+static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl,
+			 struct sk_buff *skb, struct tcmsg *tcm)
+{
+	struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+
+	tcm->tcm_parent = TC_H_ROOT;
+	tcm->tcm_handle |= TC_H_MIN(cl);
+	tcm->tcm_info = dev_queue->qdisc_sleeping->handle;
+	return 0;
+}
+
+static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+			       struct gnet_dump *d)
+{
+	struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl);
+
+	sch = dev_queue->qdisc_sleeping;
+	sch->qstats.qlen = sch->q.qlen;
+	if (gnet_stats_copy_basic(d, &sch->bstats) < 0 ||
+	    gnet_stats_copy_queue(d, &sch->qstats) < 0)
+		return -1;
+	return 0;
+}
+
+static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+	struct net_device *dev = qdisc_dev(sch);
+	unsigned long ntx;
+
+	if (arg->stop)
+		return;
+
+	arg->count = arg->skip;
+	for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) {
+		if (arg->fn(sch, ntx + 1, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+		arg->count++;
+	}
+}
+
+static const struct Qdisc_class_ops mqprio_class_ops = {
+	.graft		= mqprio_graft,
+	.leaf		= mqprio_leaf,
+	.get		= mqprio_get,
+	.put		= mqprio_put,
+	.walk		= mqprio_walk,
+	.dump		= mqprio_dump_class,
+	.dump_stats	= mqprio_dump_class_stats,
+};
+
+struct Qdisc_ops mqprio_qdisc_ops __read_mostly = {
+	.cl_ops		= &mqprio_class_ops,
+	.id		= "mqprio",
+	.priv_size	= sizeof(struct mqprio_sched),
+	.init		= mqprio_init,
+	.destroy	= mqprio_destroy,
+	.attach		= mqprio_attach,
+	.dump		= mqprio_dump,
+	.owner		= THIS_MODULE,
+};
+
+static int __init mqprio_module_init(void)
+{
+	return register_qdisc(&mqprio_qdisc_ops);
+}
+
+static void __exit mqprio_module_exit(void)
+{
+	unregister_qdisc(&mqprio_qdisc_ops);
+}
+
+module_init(mqprio_module_init);
+module_exit(mqprio_module_exit);
+
+MODULE_LICENSE("GPL");


^ permalink raw reply related

* [net-next-2.6 PATCH v4 1/2] net: implement mechanism for HW based QOS
From: John Fastabend @ 2011-01-04  3:05 UTC (permalink / raw)
  To: davem, jarkao2
  Cc: john.r.fastabend, hadi, shemminger, tgraf, eric.dumazet,
	bhutchings, nhorman, netdev

This patch provides a mechanism for lower layer devices to
steer traffic using skb->priority to tx queues. This allows
for hardware based QOS schemes to use the default qdisc without
incurring the penalties related to global state and the qdisc
lock. While reliably receiving skbs on the correct tx ring
to avoid head of line blocking resulting from shuffling in
the LLD. Finally, all the goodness from txq caching and xps/rps
can still be leveraged.

Many drivers and hardware exist with the ability to implement
QOS schemes in the hardware but currently these drivers tend
to rely on firmware to reroute specific traffic, a driver
specific select_queue or the queue_mapping action in the
qdisc.

By using select_queue for this drivers need to be updated for
each and every traffic type and we lose the goodness of much
of the upstream work. Firmware solutions are inherently
inflexible. And finally if admins are expected to build a
qdisc and filter rules to steer traffic this requires knowledge
of how the hardware is currently configured. The number of tx
queues and the queue offsets may change depending on resources.
Also this approach incurs all the overhead of a qdisc with filters.

With the mechanism in this patch users can set skb priority using
expected methods ie setsockopt() or the stack can set the priority
directly. Then the skb will be steered to the correct tx queues
aligned with hardware QOS traffic classes. In the normal case with
a single traffic class and all queues in this class everything
works as is until the LLD enables multiple tcs.

To steer the skb we mask out the lower 4 bits of the priority
and allow the hardware to configure upto 15 distinct classes
of traffic. This is expected to be sufficient for most applications
at any rate it is more then the 8021Q spec designates and is
equal to the number of prio bands currently implemented in
the default qdisc.

This in conjunction with a userspace application such as
lldpad can be used to implement 8021Q transmission selection
algorithms one of these algorithms being the extended transmission
selection algorithm currently being used for DCB.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---

 include/linux/netdevice.h |   62 +++++++++++++++++++++++++++++++++++++++++++++
 net/core/dev.c            |   10 +++++++
 2 files changed, 71 insertions(+), 1 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cc916c5..073c48d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -646,6 +646,14 @@ struct xps_dev_maps {
     (nr_cpu_ids * sizeof(struct xps_map *)))
 #endif /* CONFIG_XPS */
 
+#define TC_MAX_QUEUE	16
+#define TC_BITMASK	15
+/* HW offloaded queuing disciplines txq count and offset maps */
+struct netdev_tc_txq {
+	u16 count;
+	u16 offset;
+};
+
 /*
  * This structure defines the management hooks for network devices.
  * The following hooks can be defined; unless noted otherwise, they are
@@ -1146,6 +1154,9 @@ struct net_device {
 	/* Data Center Bridging netlink ops */
 	const struct dcbnl_rtnl_ops *dcbnl_ops;
 #endif
+	u8 num_tc;
+	struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
+	u8 prio_tc_map[TC_BITMASK+1];
 
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	/* max exchange id for FCoE LRO by ddp */
@@ -1162,6 +1173,57 @@ struct net_device {
 #define	NETDEV_ALIGN		32
 
 static inline
+int netdev_get_prio_tc_map(const struct net_device *dev, u32 prio)
+{
+	return dev->prio_tc_map[prio & TC_BITMASK];
+}
+
+static inline
+int netdev_set_prio_tc_map(struct net_device *dev, u8 prio, u8 tc)
+{
+	if (tc >= dev->num_tc)
+		return -EINVAL;
+
+	dev->prio_tc_map[prio & TC_BITMASK] = tc & TC_BITMASK;
+	return 0;
+}
+
+static inline
+void netdev_reset_tc(struct net_device *dev)
+{
+	dev->num_tc = 0;
+	memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq));
+	memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map));
+}
+
+static inline
+int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset)
+{
+	if (tc >= dev->num_tc)
+		return -EINVAL;
+
+	dev->tc_to_txq[tc].count = count;
+	dev->tc_to_txq[tc].offset = offset;
+	return 0;
+}
+
+static inline
+int netdev_set_num_tc(struct net_device *dev, u8 num_tc)
+{
+	if (num_tc > TC_MAX_QUEUE)
+		return -EINVAL;
+
+	dev->num_tc = num_tc;
+	return 0;
+}
+
+static inline
+u8 netdev_get_num_tc(const struct net_device *dev)
+{
+	return dev->num_tc;
+}
+
+static inline
 struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev,
 					 unsigned int index)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index a215269..38318e2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2165,6 +2165,8 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
 		  unsigned int num_tx_queues)
 {
 	u32 hash;
+	u16 qoffset = 0;
+	u16 qcount = num_tx_queues;
 
 	if (skb_rx_queue_recorded(skb)) {
 		hash = skb_get_rx_queue(skb);
@@ -2173,13 +2175,19 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
 		return hash;
 	}
 
+	if (dev->num_tc) {
+		u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+		qoffset = dev->tc_to_txq[tc].offset;
+		qcount = dev->tc_to_txq[tc].count;
+	}
+
 	if (skb->sk && skb->sk->sk_hash)
 		hash = skb->sk->sk_hash;
 	else
 		hash = (__force u16) skb->protocol ^ skb->rxhash;
 	hash = jhash_1word(hash, hashrnd);
 
-	return (u16) (((u64) hash * num_tx_queues) >> 32);
+	return (u16) (((u64) hash * qcount) >> 32) + qoffset;
 }
 EXPORT_SYMBOL(__skb_tx_hash);
 


^ permalink raw reply related

* Re: [PATCH net-next 2/2] cnic: Do not call bnx2i when bnx2i is calling cnic_unregister_driver()
From: David Miller @ 2011-01-04  3:08 UTC (permalink / raw)
  To: mchan; +Cc: netdev
In-Reply-To: <1294104106-9758-2-git-send-email-mchan@broadcom.com>

From: "Michael Chan" <mchan@broadcom.com>
Date: Mon, 3 Jan 2011 17:21:46 -0800

> We should call bnx2i to send the iSCSI netlink message earlier in
> cnic_unregister_device().  By the time cnic_unregister_driver() is
> called, bnx2i may have freed data structures used by the upcalls.
> 
> Update version to 2.2.12.
> 
> Reviewed-by: Benjamin Li <benli@broadcom.com>
> Signed-off-by: Michael Chan <mchan@broadcom.com>

Applied.

^ permalink raw reply

* Re: [PATCH net-next 1/2] cnic: Do not allow iSCSI and FCoE on bnx2x multi-function mode
From: David Miller @ 2011-01-04  3:08 UTC (permalink / raw)
  To: mchan; +Cc: netdev
In-Reply-To: <1294104106-9758-1-git-send-email-mchan@broadcom.com>

From: "Michael Chan" <mchan@broadcom.com>
Date: Mon, 3 Jan 2011 17:21:45 -0800

> Because the hardware does not yet support these in this mode.
> 
> Reviewed-by: Benjamin Li <benli@broadcom.com>
> Signed-off-by: Michael Chan <mchan@broadcom.com>

Applied.

^ permalink raw reply

* Re: [net-next-2.6 PATCH v2 3/3] net_sched: implement a root container qdisc sch_mclass
From: John Fastabend @ 2011-01-04  2:59 UTC (permalink / raw)
  To: Jarek Poplawski
  Cc: davem@davemloft.net, netdev@vger.kernel.org, hadi@cyberus.ca,
	shemminger@vyatta.com, tgraf@infradead.org,
	eric.dumazet@gmail.com, bhutchings@solarflare.com,
	nhorman@tuxdriver.com
In-Reply-To: <4D22674B.6010304@intel.com>

On 1/3/2011 4:18 PM, John Fastabend wrote:
> On 1/3/2011 2:59 PM, Jarek Poplawski wrote:
>> On Mon, Jan 03, 2011 at 12:37:56PM -0800, John Fastabend wrote:
>>> On 1/3/2011 9:02 AM, Jarek Poplawski wrote:
>>>> On Sun, Jan 02, 2011 at 09:43:27PM -0800, John Fastabend wrote:
>>>>> On 12/30/2010 3:37 PM, Jarek Poplawski wrote:
>>>>>> John Fastabend wrote:
>>>>>>> This implements a mclass 'multi-class' queueing discipline that by
>>>>>>> default creates multiple mq qdisc's one for each traffic class. Each
>>>>>>> mq qdisc then owns a range of queues per the netdev_tc_txq mappings.
>>>>>>
>>>>>> Is it really necessary to add one more abstraction layer for this,
>>>>>> probably not most often used (or even asked by users), functionality?
>>>>>> Why mclass can't simply do these few things more instead of attaching
>>>>>> (and changing) mq?
>>>>>>
>>>>>
>>>>> The statistics work nicely when the mq qdisc is used. 
>>>>
>>>> Well, I sometimes add leaf qdiscs only to get class stats with less
>>>> typing, too ;-)
>>>>
>>>>>
>>>>> qdisc mclass 8002: root  tc 4 map 0 1 2 3 0 1 2 3 1 1 1 1 1 1 1 1
>>>>>              queues:(0:1) (2:3) (4:5) (6:15)
>>>>>  Sent 140 bytes 2 pkt (dropped 0, overlimits 0 requeues 0)
>>>>>  backlog 0b 0p requeues 0
>>>>> qdisc mq 8003: parent 8002:1
>>>>>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>>>>>  backlog 0b 0p requeues 0
>>>>> qdisc mq 8004: parent 8002:2
>>>>>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>>>>>  backlog 0b 0p requeues 0
>>>>> qdisc mq 8005: parent 8002:3
>>>>>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>>>>>  backlog 0b 0p requeues 0
>>>>> qdisc mq 8006: parent 8002:4
>>>>>  Sent 140 bytes 2 pkt (dropped 0, overlimits 0 requeues 0)
>>>>>  backlog 0b 0p requeues 0
>>>>> qdisc sfq 8007: parent 8005:1 limit 127p quantum 1514b
>>>>>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>>>>>  backlog 0b 0p requeues 0
>>>>> qdisc sfq 8008: parent 8005:2 limit 127p quantum 1514b
>>>>>  Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0)
>>>>>  backlog 0b 0p requeues 0
>>>>>
>>>>> The mclass gives the statistics for the interface and then statistics on the mq qdisc gives statistics for each traffic class. Also, when using the 'mq qdisc' with this abstraction other qdisc can be grafted onto the queue. For example the sch_sfq is used in the above example.
>>>>
>>>> IMHO, these tc offsets and counts make simply two level hierarchy
>>>> (classes with leaf subclasses) similarly (or simpler) to other
>>>> classful qdisc which manage it all inside one module. Of course,
>>>> we could think of another way of code organization, but it should
>>>> be rather done at the beginning of schedulers design. The mq qdisc
>>>> broke the design a bit adding a fake root, but I doubt we should go
>>>> deeper unless it's necessary. Doing mclass (or something) as a more
>>>> complex alternative to mq should be enough. Why couldn't mclass graft
>>>> sch_sfq the same way as mq?
>>>>
>>>
>>> If you also want to graft a scheduler onto a traffic class now your stuck. For now this qdisc doesn't exist, but I would like to have a software implementation of the currently offloaded DCB ETS scheduler. The 802.1Qaz spec allows different scheduling algorithms to be used on each traffic class. In the current implementation mclass could graft these scheduling schemes onto each traffic class independently.
>>>
>>>                               mclass
>>>                                 |
>>>     -------------------------------------------------------
>>>     |         |        |        |     |     |     |       |
>>>    mq_tbf   mq_tbf   mq_ets   mq_ets  mq    mq   mq_wrr greedy
>>>    |                            |
>>>  ---------                  ---------
>>>  |   |   |                  |   |   |
>>> red red red                red red red
>>>
>>> Perhaps, being concerned with hypothetical qdiscs is a bit of a stretch but I would like to at least not preclude this work from being done in the future.
>>
>> Probably, despite this very nice figure and description, I still miss
>> something and can't see the problem. If you graft a qdisc/scheduler
>> to a traffic class you can change the way/range of grafting depending
>> on additional parameters or even by checking some properties of the
>> grafted qdisc. My main concern is adding complexity to the qdisc tree
>> structure (instead of hiding it at the class level) for something,
>> IMHO, hardly ever popular (like both mq and DCB).
>>
> 
> OK I'm convinced I'll keep everything contained in mclass. Building this mechanism into the qdisc seems to be adding extra complexity that is most likely not needed as you noted.
> 
> Although I suspect the "additional parameter" would be something along the lines of a queue index and offset? right? Otherwise how would a mq like qdisc know which queues it owns.


Perhaps something with qdisc_class_ops select_queue() could be done to make it more flexible. When I get around to implementing these hypothetical qdiscs I will have to figure it out. For now though hypothetical qdiscs are not a very compelling use case.

Thanks,
John.

^ permalink raw reply

* RE: [net-next-2.6 08/08] r8169: more 8168dp support.
From: hayeswang @ 2011-01-04  2:43 UTC (permalink / raw)
  To: 'Francois Romieu'; +Cc: davem, netdev, 'Ben Hutchings'
In-Reply-To: <20110104002606.GA5934@electric-eye.fr.zoreil.com>

> From: Francois Romieu [mailto:romieu@fr.zoreil.com] 
> Sent: Tuesday, January 04, 2011 8:26 AM
> To: Hayeswang
> Cc: davem@davemloft.net; netdev@vger.kernel.org; 'Ben Hutchings'
> Subject: Re: [net-next-2.6 08/08] r8169: more 8168dp support.
> 
> hayeswang <hayeswang@realtek.com> :
> [...]
> > > +static void rtl8169_hw_reset(struct rtl8169_private *tp)
> > >  {
> > > +	void __iomem *ioaddr = tp->mmio_addr;
> > > +
> > >  	/* Disable interrupts */
> > >  	rtl8169_irq_mask_and_ack(ioaddr);
> > >  
> > > +	if (tp->mac_version == RTL_GIGA_MAC_VER_28) {
> > 
> > This check should include RTL_GIGA_MAC_VER_27.
> 
> Sure. I have a different (yet untested) patch for it. See below.
> 
> > > +		while (RTL_R8(TxPoll) & NPQ)
> > > +			udelay(20);
> > > +
> > > +	}
> > > +
> > >  	/* Reset the chipset */
> > >  	RTL_W8(ChipCmd, CmdReset);
> > >  
> > 
> > After the reset, there are something to do for RTL_GIGA_MAC_VER_27. 
> > You may check the soure code of realtek. Find "rtl8168_nic_reset".
> 
> Ok. Any comment about the patch below ? I wish it was more 
> expressive, especially the "mutex" magic.
> 

The chip of 8111DP has an embedded system inside. Thus, sometime the nic has to
told the embedded system what the nic is doing now. For this reason, you can
find the function "OOB_notify" in source code of realtek. Furthermore, we do a
software mutex to avoid the driver and system from accessing the same register.
When doing reset, the nic has to notify the embedded system and wait a response.
However, maybe the system accesses the same register at the same time, so the
embedded system and the driver implement the same method of software mutex to
prevent this situation.

 
Best Regards,
Hayes


^ permalink raw reply

* linux-next: manual merge of the net tree with the pci tree
From: Stephen Rothwell @ 2011-01-04  2:28 UTC (permalink / raw)
  To: David Miller, netdev
  Cc: linux-next, linux-kernel, Jon Mason, Jesse Barnes,
	stephen hemminger

[-- Attachment #1: Type: text/plain, Size: 517 bytes --]

Hi all,

Today's linux-next merge of the net tree got a conflict in
drivers/net/skge.c between commit
1d3c16a818e992c199844954d95c17fd7ce6cbba ("PCI: make pci_restore_state
return void") from the pci tree and commit
7dbf6acdbad2fbc6eea72b58404461dcb7c6d9d2 ("skge: Do not use legacy PCI
power management") from the net tree.

The latter removes the code modified by the former, so I used the latter.
-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/

[-- Attachment #2: Type: application/pgp-signature, Size: 490 bytes --]

^ permalink raw reply

* linux-next: manual merge of the net tree with the  tree
From: Stephen Rothwell @ 2011-01-04  2:28 UTC (permalink / raw)
  To: David Miller, netdev
  Cc: linux-next, linux-kernel, Jon Mason, Jesse Barnes,
	Rafael J. Wysocki

[-- Attachment #1: Type: text/plain, Size: 514 bytes --]

Hi all,

Today's linux-next merge of the net tree got a conflict in
drivers/net/sky2.c between
commit1d3c16a818e992c199844954d95c17fd7ce6cbba  ("PCI: make
pci_restore_state return void") from the  tree and commit
0f333d10e3f689640b229c8cf00b16ea51ce4951 ("sky2: Do not use legacy PCI
power management") from the net tree.

The latter removes the code that the former modifies.  I used the latter.
-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/

[-- Attachment #2: Type: application/pgp-signature, Size: 490 bytes --]

^ permalink raw reply

* [PATCH net-next 2/2] cnic: Do not call bnx2i when bnx2i is calling cnic_unregister_driver()
From: Michael Chan @ 2011-01-04  1:21 UTC (permalink / raw)
  To: davem; +Cc: netdev
In-Reply-To: <1294104106-9758-1-git-send-email-mchan@broadcom.com>

We should call bnx2i to send the iSCSI netlink message earlier in
cnic_unregister_device().  By the time cnic_unregister_driver() is
called, bnx2i may have freed data structures used by the upcalls.

Update version to 2.2.12.

Reviewed-by: Benjamin Li <benli@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/cnic.c    |   19 +++----------------
 drivers/net/cnic_if.h |    6 +++---
 2 files changed, 6 insertions(+), 19 deletions(-)

diff --git a/drivers/net/cnic.c b/drivers/net/cnic.c
index 41957fa..6dfa564 100644
--- a/drivers/net/cnic.c
+++ b/drivers/net/cnic.c
@@ -426,19 +426,6 @@ static int cnic_abort_prep(struct cnic_sock *csk)
 	return 0;
 }
 
-static void cnic_uio_stop(void)
-{
-	struct cnic_dev *dev;
-
-	read_lock(&cnic_dev_lock);
-	list_for_each_entry(dev, &cnic_dev_list, list) {
-		struct cnic_local *cp = dev->cnic_priv;
-
-		cnic_send_nlmsg(cp, ISCSI_KEVENT_IF_DOWN, NULL);
-	}
-	read_unlock(&cnic_dev_lock);
-}
-
 int cnic_register_driver(int ulp_type, struct cnic_ulp_ops *ulp_ops)
 {
 	struct cnic_dev *dev;
@@ -510,9 +497,6 @@ int cnic_unregister_driver(int ulp_type)
 	}
 	read_unlock(&cnic_dev_lock);
 
-	if (ulp_type == CNIC_ULP_ISCSI)
-		cnic_uio_stop();
-
 	rcu_assign_pointer(cnic_ulp_tbl[ulp_type], NULL);
 
 	mutex_unlock(&cnic_lock);
@@ -596,6 +580,9 @@ static int cnic_unregister_device(struct cnic_dev *dev, int ulp_type)
 	}
 	mutex_unlock(&cnic_lock);
 
+	if (ulp_type == CNIC_ULP_ISCSI)
+		cnic_send_nlmsg(cp, ISCSI_KEVENT_IF_DOWN, NULL);
+
 	synchronize_rcu();
 
 	while (test_bit(ULP_F_CALL_PENDING, &cp->ulp_flags[ulp_type]) &&
diff --git a/drivers/net/cnic_if.h b/drivers/net/cnic_if.h
index ccd8140..9f44e0f 100644
--- a/drivers/net/cnic_if.h
+++ b/drivers/net/cnic_if.h
@@ -1,6 +1,6 @@
 /* cnic_if.h: Broadcom CNIC core network driver.
  *
- * Copyright (c) 2006-2010 Broadcom Corporation
+ * Copyright (c) 2006-2011 Broadcom Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -12,8 +12,8 @@
 #ifndef CNIC_IF_H
 #define CNIC_IF_H
 
-#define CNIC_MODULE_VERSION	"2.2.11"
-#define CNIC_MODULE_RELDATE	"Dec 22, 2010"
+#define CNIC_MODULE_VERSION	"2.2.12"
+#define CNIC_MODULE_RELDATE	"Jan 03, 2011"
 
 #define CNIC_ULP_RDMA		0
 #define CNIC_ULP_ISCSI		1
-- 
1.6.4.GIT



^ permalink raw reply related

* [PATCH net-next 1/2] cnic: Do not allow iSCSI and FCoE on bnx2x multi-function mode
From: Michael Chan @ 2011-01-04  1:21 UTC (permalink / raw)
  To: davem; +Cc: netdev

Because the hardware does not yet support these in this mode.

Reviewed-by: Benjamin Li <benli@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
---
 drivers/net/cnic.c |    8 ++------
 1 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/cnic.c b/drivers/net/cnic.c
index 4a9c628..41957fa 100644
--- a/drivers/net/cnic.c
+++ b/drivers/net/cnic.c
@@ -4826,12 +4826,8 @@ static void cnic_get_bnx2x_iscsi_info(struct cnic_dev *dev)
 		val = CNIC_RD(dev, addr);
 		val &= FUNC_MF_CFG_E1HOV_TAG_MASK;
 		if (val != FUNC_MF_CFG_E1HOV_TAG_DEFAULT) {
-			addr = BNX2X_MF_CFG_ADDR(mf_cfg_addr,
-				func_mf_config[func].config);
-			val = CNIC_RD(dev, addr);
-			val &= FUNC_MF_CFG_PROTOCOL_MASK;
-			if (val != FUNC_MF_CFG_PROTOCOL_ISCSI)
-				dev->max_iscsi_conn = 0;
+			dev->max_fcoe_conn = 0;
+			dev->max_iscsi_conn = 0;
 		}
 	}
 	if (!is_valid_ether_addr(dev->mac_addr))
-- 
1.6.4.GIT



^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox