Netdev List
 help / color / mirror / Atom feed
* Re: [PATCH net-next v3 4/4] ipv6: use DST_* macro to set obselete field
From: Eric Dumazet @ 2012-09-12  7:40 UTC (permalink / raw)
  To: Nicolas Dichtel
  Cc: vyasevich, davem, sds, james.l.morris, eparis, sri, linux-sctp,
	netdev
In-Reply-To: <1347350987-8054-5-git-send-email-nicolas.dichtel@6wind.com>

On Tue, 2012-09-11 at 10:09 +0200, Nicolas Dichtel wrote:
> Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
> ---
>  net/ipv6/route.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
> 
> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
> index 561f249..0c6f132 100644
> --- a/net/ipv6/route.c
> +++ b/net/ipv6/route.c
> @@ -226,7 +226,7 @@ static struct rt6_info ip6_null_entry_template = {
>  	.dst = {
>  		.__refcnt	= ATOMIC_INIT(1),
>  		.__use		= 1,
> -		.obsolete	= -1,
> +		.obsolete	= DST_OBSOLETE_FORCE_CHK,
>  		.error		= -ENETUNREACH,
>  		.input		= ip6_pkt_discard,
>  		.output		= ip6_pkt_discard_out,
> @@ -246,7 +246,7 @@ static struct rt6_info ip6_prohibit_entry_template = {
>  	.dst = {
>  		.__refcnt	= ATOMIC_INIT(1),
>  		.__use		= 1,
> -		.obsolete	= -1,
> +		.obsolete	= DST_OBSOLETE_FORCE_CHK,
>  		.error		= -EACCES,
>  		.input		= ip6_pkt_prohibit,
>  		.output		= ip6_pkt_prohibit_out,
> @@ -261,7 +261,7 @@ static struct rt6_info ip6_blk_hole_entry_template = {
>  	.dst = {
>  		.__refcnt	= ATOMIC_INIT(1),
>  		.__use		= 1,
> -		.obsolete	= -1,
> +		.obsolete	= DST_OBSOLETE_FORCE_CHK,
>  		.error		= -EINVAL,
>  		.input		= dst_discard,
>  		.output		= dst_discard,

Acked-by: Eric Dumazet <edumazet@google.com>

^ permalink raw reply

* Re: [PATCH] ipv6: replace write lock with read lock when get route info
From: Eric Dumazet @ 2012-09-12  7:45 UTC (permalink / raw)
  To: roy.qing.li; +Cc: netdev
In-Reply-To: <1347427553-17781-1-git-send-email-roy.qing.li@gmail.com>

On Wed, 2012-09-12 at 13:25 +0800, roy.qing.li@gmail.com wrote:
> From: Li RongQing <roy.qing.li@gmail.com>
> 
> geting route info does not write rt->rt6i_table, so replace
> write lock with read lock
> 
> Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
> ---
>  net/ipv6/route.c |    8 ++++----
>  1 files changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/net/ipv6/route.c b/net/ipv6/route.c
> index 399613b..8be1d86 100644
> --- a/net/ipv6/route.c
> +++ b/net/ipv6/route.c
> @@ -1837,7 +1837,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
>  	if (!table)
>  		return NULL;
>  
> -	write_lock_bh(&table->tb6_lock);
> +	read_lock_bh(&table->tb6_lock);
>  	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
>  	if (!fn)
>  		goto out;
> @@ -1853,7 +1853,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
>  		break;
>  	}
>  out:
> -	write_unlock_bh(&table->tb6_lock);
> +	read_unlock_bh(&table->tb6_lock);
>  	return rt;
>  }
>  
> @@ -1896,7 +1896,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
>  	if (!table)
>  		return NULL;
>  
> -	write_lock_bh(&table->tb6_lock);
> +	read_lock_bh(&table->tb6_lock);
>  	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
>  		if (dev == rt->dst.dev &&
>  		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
> @@ -1905,7 +1905,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
>  	}
>  	if (rt)
>  		dst_hold(&rt->dst);
> -	write_unlock_bh(&table->tb6_lock);
> +	read_unlock_bh(&table->tb6_lock);
>  	return rt;
>  }
>  

Why dont you also change addrconf_get_prefix_route() ?

^ permalink raw reply

* [PATCH] netfilter/iptables: Fix log-level processing
From: Joe Perches @ 2012-09-12  7:46 UTC (permalink / raw)
  To: auto75914331, Bart De Schuymer, Pablo Neira Ayuso,
	Patrick McHardy, Stephen Hemminger
  Cc: netfilter-devel, netfilter, coreteam, bridge, netdev,
	linux-kernel
In-Reply-To: <20120912045120.9E1A76F446@smtp.hushmail.com>

auto75914331@hushmail.com reports that iptables does not correctly
output the KERN_<level>.

$IPTABLES -A RULE_0_in  -j LOG  --log-level notice --log-prefix "DENY  in: "

result with linux 3.6-rc5
Sep 12 06:37:29 xxxxx kernel: <5>DENY  in: IN=eth0 OUT= MAC=.......

result with linux 3.5.3 and older:
Sep  9 10:43:01 xxxxx kernel: DENY  in: IN=eth0 OUT= MAC......

commit 04d2c8c83d0
("printk: convert the format for KERN_<LEVEL> to a 2 byte pattern")
updated the syslog header style but did not update netfilter uses.

Do so.

Signed-off-by: Joe Perches <joe@perches.com>
cc: auto75914331@hushmail.com
---
 net/bridge/netfilter/ebt_log.c |    4 ++--
 net/netfilter/xt_LOG.c         |    4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index f88ee53..cb46d2f 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -80,8 +80,8 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum,
 	unsigned int bitmask;
 
 	spin_lock_bh(&ebt_log_lock);
-	printk("<%c>%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x",
-	       '0' + loginfo->u.log.level, prefix,
+	printk("%c%c%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x",
+	       KERN_SOH_ASCII, '0' + loginfo->u.log.level, prefix,
 	       in ? in->name : "", out ? out->name : "",
 	       eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
 	       ntohs(eth_hdr(skb)->h_proto));
diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
index ff5f75f..bdc5352 100644
--- a/net/netfilter/xt_LOG.c
+++ b/net/netfilter/xt_LOG.c
@@ -436,8 +436,8 @@ log_packet_common(struct sbuff *m,
 		  const struct nf_loginfo *loginfo,
 		  const char *prefix)
 {
-	sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
-	       prefix,
+	sb_add(m, "%c%c%sIN=%s OUT=%s ",
+	       KERN_SOH_ASCII, '0' + loginfo->u.log.level, prefix,
 	       in ? in->name : "",
 	       out ? out->name : "");
 #ifdef CONFIG_BRIDGE_NETFILTER



^ permalink raw reply related

* [PATCH net-next] ipv6: route templates can be const
From: Eric Dumazet @ 2012-09-12  7:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

From: Eric Dumazet <edumazet@google.com>

We kmemdup() templates, so they can be const.

Signed-off-by: Eric Dumazet <edumazet@google.com>
---
 net/ipv6/route.c |    6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 399613b..f568ac6 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -222,7 +222,7 @@ static const u32 ip6_template_metrics[RTAX_MAX] = {
 	[RTAX_HOPLIMIT - 1] = 255,
 };
 
-static struct rt6_info ip6_null_entry_template = {
+static const struct rt6_info ip6_null_entry_template = {
 	.dst = {
 		.__refcnt	= ATOMIC_INIT(1),
 		.__use		= 1,
@@ -242,7 +242,7 @@ static struct rt6_info ip6_null_entry_template = {
 static int ip6_pkt_prohibit(struct sk_buff *skb);
 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
 
-static struct rt6_info ip6_prohibit_entry_template = {
+static const struct rt6_info ip6_prohibit_entry_template = {
 	.dst = {
 		.__refcnt	= ATOMIC_INIT(1),
 		.__use		= 1,
@@ -257,7 +257,7 @@ static struct rt6_info ip6_prohibit_entry_template = {
 	.rt6i_ref	= ATOMIC_INIT(1),
 };
 
-static struct rt6_info ip6_blk_hole_entry_template = {
+static const struct rt6_info ip6_blk_hole_entry_template = {
 	.dst = {
 		.__refcnt	= ATOMIC_INIT(1),
 		.__use		= 1,

^ permalink raw reply related

* Re: [v2 PATCH 2/2] netprio_cgroup: Use memcpy instead of the for-loop to copy priomap
From: David Miller @ 2012-09-12  7:49 UTC (permalink / raw)
  To: srivatsa.bhat
  Cc: nhorman, David.Laight, john.r.fastabend, gaofeng, eric.dumazet,
	mark.d.rustad, lizefan, netdev, linux-kernel
In-Reply-To: <20120912060747.11037.42623.stgit@srivatsabhat.in.ibm.com>

From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
Date: Wed, 12 Sep 2012 11:37:47 +0530

> +		memcpy(new_priomap->priomap, old_priomap->priomap,
> +			old_priomap->priomap_len *
> +					sizeof(old_priomap->priomap[0]));

This argument indentation is ridiculous.  Try:

		memcpy(new_priomap->priomap, old_priomap->priomap,
		       old_priomap->priomap_len *
		       sizeof(old_priomap->priomap[0]));

Using TABs exclusively for argumentat indentation is not the goal.

Rather, lining the arguments up properly so that they sit at the first
column after the first line's openning parenthesis is what you should
be trying to achieve.

And ignoring whatever stylistic convention we may or may not have, I
find it impossibly hard to believe that the code quoted above looks
good even to you.

^ permalink raw reply

* Re: [PATCH] ipv6: replace write lock with read lock when get route info
From: RongQing Li @ 2012-09-12  7:49 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev
In-Reply-To: <1347435907.13103.693.camel@edumazet-glaptop>

>>
>
> Why dont you also change addrconf_get_prefix_route() ?
>
>

I did not find it, I will send v2
Thanks

-Roy

^ permalink raw reply

* Re: [PATCHv4] virtio-spec: virtio network device multiqueue support
From: Michael S. Tsirkin @ 2012-09-12  7:57 UTC (permalink / raw)
  To: Rusty Russell
  Cc: kvm, netdev, rick.jones2, virtualization, levinsasha928, pbonzini,
	Tom Herbert
In-Reply-To: <87har3dc4o.fsf@rustcorp.com.au>

On Wed, Sep 12, 2012 at 03:19:11PM +0930, Rusty Russell wrote:
> Jason Wang <jasowang@redhat.com> writes:
> > On 09/10/2012 02:33 PM, Michael S. Tsirkin wrote:
> >> A final addition: what you suggest above would be
> >> "TX follows RX", right?
> 
> BTW, yes.  But it's a weird way to express what the nic is doing.

It explains what the system is doing.
TX is done by driver, RX by nic.
We document both driver and device in the spec
so I thought it's fine. any suggestions wellcome.

> >> It is in anticipation of something like that, that I made
> >> steering programming so generic.
> 
> >> I think TX follows RX is more immediately useful for reasons above
> >> but we can add both to spec and let drivers and devices
> >> decide what they want to support.
> 
> You mean "RX follows TX"?  ie. accelerated RFS.  I agree.


Yes that's what I meant. Thanks for the correction.

> Perhaps Tom can explain how we avoid out-of-order receive for the
> accelerated RFS case?  It's not clear to me, but we need to be able to
> do that for virtio-net if it implements accelerated RFS.

Basically this has tx vq per cpu and relies on scheduler not bouncing threads
between cpus too aggressively. Appears to be what ixgbe does.

> > AFAIK, ixgbe does "rx follows tx". The only differences between ixgbe 
> > and virtio-net is that ixgbe driver programs the flow director during 
> > packet transmission but we suggest to do it silently in the device for 
> > simplicity.
> 
> Implying the receive queue by xmit will be slightly laggy.  Don't know
> if that's a problem.
> 
> Cheers,
> Rusty.

Doesn't seem to be a problem in Jason's testing so far.

^ permalink raw reply

* [PATCH v2] ipv6: replace write lock with read lock when get route info
From: roy.qing.li @ 2012-09-12  7:59 UTC (permalink / raw)
  To: netdev

From: Li RongQing <roy.qing.li@gmail.com>

geting route info does not write rt->rt6i_table, so replace
write lock with read lock

Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
---
 net/ipv6/addrconf.c |    4 ++--
 net/ipv6/route.c    |    8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 1237d5d..061c100 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1706,7 +1706,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 	if (table == NULL)
 		return NULL;
 
-	write_lock_bh(&table->tb6_lock);
+	read_lock_bh(&table->tb6_lock);
 	fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0);
 	if (!fn)
 		goto out;
@@ -1721,7 +1721,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
 		break;
 	}
 out:
-	write_unlock_bh(&table->tb6_lock);
+	read_unlock_bh(&table->tb6_lock);
 	return rt;
 }
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 399613b..8be1d86 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1837,7 +1837,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
 	if (!table)
 		return NULL;
 
-	write_lock_bh(&table->tb6_lock);
+	read_lock_bh(&table->tb6_lock);
 	fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
 	if (!fn)
 		goto out;
@@ -1853,7 +1853,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
 		break;
 	}
 out:
-	write_unlock_bh(&table->tb6_lock);
+	read_unlock_bh(&table->tb6_lock);
 	return rt;
 }
 
@@ -1896,7 +1896,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
 	if (!table)
 		return NULL;
 
-	write_lock_bh(&table->tb6_lock);
+	read_lock_bh(&table->tb6_lock);
 	for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) {
 		if (dev == rt->dst.dev &&
 		    ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
@@ -1905,7 +1905,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev
 	}
 	if (rt)
 		dst_hold(&rt->dst);
-	write_unlock_bh(&table->tb6_lock);
+	read_unlock_bh(&table->tb6_lock);
 	return rt;
 }
 
-- 
1.7.4.1

^ permalink raw reply related

* Re: [PATCH] netfilter/iptables: Fix log-level processing
From: Eric Dumazet @ 2012-09-12  8:07 UTC (permalink / raw)
  To: Joe Perches
  Cc: auto75914331, netfilter, coreteam, netdev, bridge, linux-kernel,
	Bart De Schuymer, netfilter-devel, Stephen Hemminger,
	Patrick McHardy, Pablo Neira Ayuso
In-Reply-To: <1347435973.2456.23.camel@joe2Laptop>

On Wed, 2012-09-12 at 00:46 -0700, Joe Perches wrote:
> auto75914331@hushmail.com reports that iptables does not correctly
> output the KERN_<level>.
> 
> $IPTABLES -A RULE_0_in  -j LOG  --log-level notice --log-prefix "DENY  in: "
> 
> result with linux 3.6-rc5
> Sep 12 06:37:29 xxxxx kernel: <5>DENY  in: IN=eth0 OUT= MAC=.......
> 
> result with linux 3.5.3 and older:
> Sep  9 10:43:01 xxxxx kernel: DENY  in: IN=eth0 OUT= MAC......
> 
> commit 04d2c8c83d0
> ("printk: convert the format for KERN_<LEVEL> to a 2 byte pattern")
> updated the syslog header style but did not update netfilter uses.
> 
> Do so.
> 
> Signed-off-by: Joe Perches <joe@perches.com>
> cc: auto75914331@hushmail.com
> ---
>  net/bridge/netfilter/ebt_log.c |    4 ++--
>  net/netfilter/xt_LOG.c         |    4 ++--
>  2 files changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
> index f88ee53..cb46d2f 100644
> --- a/net/bridge/netfilter/ebt_log.c
> +++ b/net/bridge/netfilter/ebt_log.c
> @@ -80,8 +80,8 @@ ebt_log_packet(u_int8_t pf, unsigned int hooknum,
>  	unsigned int bitmask;
>  
>  	spin_lock_bh(&ebt_log_lock);
> -	printk("<%c>%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x",
> -	       '0' + loginfo->u.log.level, prefix,
> +	printk("%c%c%s IN=%s OUT=%s MAC source = %pM MAC dest = %pM proto = 0x%04x",
> +	       KERN_SOH_ASCII, '0' + loginfo->u.log.level, prefix,
>  	       in ? in->name : "", out ? out->name : "",
>  	       eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest,
>  	       ntohs(eth_hdr(skb)->h_proto));
> diff --git a/net/netfilter/xt_LOG.c b/net/netfilter/xt_LOG.c
> index ff5f75f..bdc5352 100644
> --- a/net/netfilter/xt_LOG.c
> +++ b/net/netfilter/xt_LOG.c
> @@ -436,8 +436,8 @@ log_packet_common(struct sbuff *m,
>  		  const struct nf_loginfo *loginfo,
>  		  const char *prefix)
>  {
> -	sb_add(m, "<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
> -	       prefix,
> +	sb_add(m, "%c%c%sIN=%s OUT=%s ",
> +	       KERN_SOH_ASCII, '0' + loginfo->u.log.level, prefix,
>  	       in ? in->name : "",
>  	       out ? out->name : "");
>  #ifdef CONFIG_BRIDGE_NETFILTER
> 

would be better to avoid the %c

->

 sb_add(m, KERN_SOH "%c%sIN=%s OUT=%s ",
	'0' + loginfo->u.log.level, prefix,

^ permalink raw reply

* Re: [PATCH v2] ipv6: replace write lock with read lock when get route info
From: Eric Dumazet @ 2012-09-12  8:12 UTC (permalink / raw)
  To: roy.qing.li; +Cc: netdev
In-Reply-To: <1347436741-344-1-git-send-email-roy.qing.li@gmail.com>

On Wed, 2012-09-12 at 15:59 +0800, roy.qing.li@gmail.com wrote:
> From: Li RongQing <roy.qing.li@gmail.com>
> 
> geting route info does not write rt->rt6i_table, so replace
> write lock with read lock
> 
> Suggested-by: Eric Dumazet <edumazet@google.com>
> Signed-off-by: Li RongQing <roy.qing.li@gmail.com>
> ---
>  net/ipv6/addrconf.c |    4 ++--
>  net/ipv6/route.c    |    8 ++++----
>  2 files changed, 6 insertions(+), 6 deletions(-)

I guess you missed the net-next tag in your [PATCH ...] ?
 
Signed-off-by: Eric Dumazet <edumazet@google.com>

^ permalink raw reply

* Re: [v2 PATCH 2/2] netprio_cgroup: Use memcpy instead of the for-loop to copy priomap
From: Srivatsa S. Bhat @ 2012-09-12  8:24 UTC (permalink / raw)
  To: David Miller
  Cc: nhorman, David.Laight, john.r.fastabend, gaofeng, eric.dumazet,
	mark.d.rustad, lizefan, netdev, linux-kernel
In-Reply-To: <20120912.034901.184817520125489015.davem@davemloft.net>

On 09/12/2012 01:19 PM, David Miller wrote:
> From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
> Date: Wed, 12 Sep 2012 11:37:47 +0530
> 
>> +		memcpy(new_priomap->priomap, old_priomap->priomap,
>> +			old_priomap->priomap_len *
>> +					sizeof(old_priomap->priomap[0]));
> 
> This argument indentation is ridiculous.  Try:
> 
> 		memcpy(new_priomap->priomap, old_priomap->priomap,
> 		       old_priomap->priomap_len *
> 		       sizeof(old_priomap->priomap[0]));
> 
> Using TABs exclusively for argumentat indentation is not the goal.
> 
> Rather, lining the arguments up properly so that they sit at the first
> column after the first line's openning parenthesis is what you should
> be trying to achieve.

OK, will fix it, thanks!

> 
> And ignoring whatever stylistic convention we may or may not have, I
> find it impossibly hard to believe that the code quoted above looks
> good even to you.
>

On second thoughts, I think the memcpy in this case will actually be worse
since it will copy the contents in chunks of smaller size than the for-loop.

Or, did you mean to say that this code is plain wrong for some reason?

Regards,
Srivatsa S. Bhat

^ permalink raw reply

* [RFC PATCH net-next 0/1] Add support of ECMPv6
From: Nicolas Dichtel @ 2012-09-12  8:29 UTC (permalink / raw)
  To: bernat, netdev, yoshfuji, davem
In-Reply-To: <87a9x3vxzp.fsf@guybrush.luffy.cx>

Here is a proposal to add the support of ECMPv6. The previous patch
from Vincent against iproute2 can be used, but a little other patch is needed
too:

diff --git a/ip/iproute.c b/ip/iproute.c
index 2fe44b3..b71f150 100644
--- a/ip/iproute.c
+++ b/ip/iproute.c
@@ -693,8 +693,10 @@ int parse_nexthops(struct nlmsghdr *n, struct rtmsg *r, int argc, char **argv)
 		rtnh = RTNH_NEXT(rtnh);
 	}
 
-	if (rta->rta_len > RTA_LENGTH(0))
+	if (rta->rta_len > RTA_LENGTH(0)) {
 		addattr_l(n, 1024, RTA_MULTIPATH, RTA_DATA(rta), RTA_PAYLOAD(rta));
+		n->nlmsg_flags &= ~NLM_F_EXCL;
+	}
 	return 0;
 }
 

If the kernel patch is approved, I will submit formally the patch for
iproute2.

Here is an example of a command to add an ECMP route:
$ ip -6 route add 3ffe:304:124:2306::/64 \
	nexthop via fe80::230:1bff:feb4:e05c dev eth0 weight 1 \
	nexthop via fe80::230:1bff:feb4:dd4f dev eth0 weight 1

Comments are welcome.

Regards,
Nicolas

^ permalink raw reply related

* [RFC PATCH net-next 1/1] ipv6: add support of ECMP
From: Nicolas Dichtel @ 2012-09-12  8:29 UTC (permalink / raw)
  To: bernat, netdev, yoshfuji, davem; +Cc: Nicolas Dichtel
In-Reply-To: <1347438597-4233-1-git-send-email-nicolas.dichtel@6wind.com>

This patch adds the support of equal cost multipath for IPv6.

The patch is based on a previous work from
Luc Saillard <luc.saillard@6wind.com>.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
---
 include/net/ip6_fib.h |  13 ++++
 net/ipv6/Kconfig      |  32 ++++++++
 net/ipv6/ip6_fib.c    |  73 ++++++++++++++++++
 net/ipv6/route.c      | 207 +++++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 322 insertions(+), 3 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index cd64cf3..8071c66 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -47,6 +47,10 @@ struct fib6_config {
 	unsigned long	fc_expires;
 	struct nlattr	*fc_mx;
 	int		fc_mx_len;
+#ifdef CONFIG_IPV6_MULTIPATH
+	struct nlattr	*fc_mp;
+	int		fc_mp_len;
+#endif
 
 	struct nl_info	fc_nlinfo;
 };
@@ -98,6 +102,15 @@ struct rt6_info {
 	struct fib6_node		*rt6i_node;
 
 	struct in6_addr			rt6i_gateway;
+#ifdef CONFIG_IPV6_MULTIPATH
+	/*
+	 * siblings is a list of rt6_info that have the the same metric/weight,
+	 * destination, but not the same gateway. nsiblings is just a cache
+	 * to speed up lookup.
+	 */
+	unsigned int                    rt6i_nsiblings;
+	struct list_head                rt6i_siblings;
+#endif
 
 	atomic_t			rt6i_ref;
 
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 4f7fe72..5980aec 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -266,4 +266,36 @@ config IPV6_PIMSM_V2
 	  Support for IPv6 PIM multicast routing protocol PIM-SMv2.
 	  If unsure, say N.
 
+config IPV6_MULTIPATH
+	bool "IPv6: equal cost multipath for IPv6 routing"
+	depends on IPV6
+	default y
+	---help---
+	  Enable this option to support ECMP for IPv6.
+	  If unsure, say N.
+
+choice
+	prompt "IPv6: choose Multipath algorithm"
+	depends on IPV6_MULTIPATH
+	default IPV6_MULTIPATH_ROUTE
+	---help---
+	  Define the method to select route between each possible path.
+
+	config IPV6_MULTIPATH_ROUTE
+	bool "IPv6: MULTIPATH flow algorithm"
+	---help---
+	  Multipath routes are chosen according to hash of packet header to
+	  ensure a flow keeps the same route.
+
+	config IPV6_MULTIPATH_RR
+	bool "IPv6: MULTIPATH round robin algorithm"
+	---help---
+	  Multipath routes are chosen according to Round Robin.
+
+	config IPV6_MULTIPATH_RANDOM
+	bool "IPv6: MULTIPATH random algorithm"
+	---help---
+	  Multipath routes are chosen in a random fashion.
+endchoice
+
 endif # IPV6
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 13690d6..3541e44 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -672,6 +672,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 			    iter->rt6i_idev == rt->rt6i_idev &&
 			    ipv6_addr_equal(&iter->rt6i_gateway,
 					    &rt->rt6i_gateway)) {
+#ifdef CONFIG_IPV6_MULTIPATH
+				if (rt->rt6i_nsiblings)
+					rt->rt6i_nsiblings = 0;
+#endif
 				if (!(iter->rt6i_flags & RTF_EXPIRES))
 					return -EEXIST;
 				if (!(rt->rt6i_flags & RTF_EXPIRES))
@@ -680,6 +684,23 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 					rt6_set_expires(iter, rt->dst.expires);
 				return -EEXIST;
 			}
+#ifdef CONFIG_IPV6_MULTIPATH
+			/* If we have the same destination and the same metric,
+			 * but not the same gateway, then the route we try to
+			 * add is sibling to this route, increment our counter
+			 * of siblings, and later we will add our route to the
+			 * list.
+			 * Only static routes (which don't have flag
+			 * RTF_EXPIRES) are used for ECMPv6.
+			 *
+			 * To avoid long list, we only had siblings if the
+			 * route have a gateway.
+			 */
+			if (rt->rt6i_flags & RTF_GATEWAY &&
+			    !(rt->rt6i_flags & RTF_EXPIRES) &&
+			    !(iter->rt6i_flags & RTF_EXPIRES))
+				rt->rt6i_nsiblings++;
+#endif
 		}
 
 		if (iter->rt6i_metric > rt->rt6i_metric)
@@ -692,6 +713,43 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 	if (ins == &fn->leaf)
 		fn->rr_ptr = NULL;
 
+#ifdef CONFIG_IPV6_MULTIPATH
+	/* Link this route to others same route. */
+	if (rt->rt6i_nsiblings) {
+		unsigned int rt6i_nsiblings;
+		struct rt6_info *sibling, *temp_sibling;
+
+		/* Find the first route that have the same metric */
+		sibling = fn->leaf;
+		while (sibling) {
+			if (sibling->rt6i_metric == rt->rt6i_metric) {
+				list_add_tail(&rt->rt6i_siblings,
+					      &sibling->rt6i_siblings);
+				break;
+			}
+			sibling = sibling->dst.rt6_next;
+		}
+		/* For each sibling in the list, increment the counter of
+		 * siblings. We can check if all the counter are equal.
+		 */
+		rt6i_nsiblings = 0;
+		list_for_each_entry_safe(sibling, temp_sibling,
+					 &rt->rt6i_siblings,
+					 rt6i_siblings) {
+			sibling->rt6i_nsiblings++;
+			if (unlikely(sibling->rt6i_nsiblings !=
+				     rt->rt6i_nsiblings)) {
+				pr_err("Wrong number of siblings for route %p (%d)\n",
+				       sibling, sibling->rt6i_nsiblings);
+			}
+			rt6i_nsiblings++;
+		}
+		if (unlikely(rt6i_nsiblings != rt->rt6i_nsiblings)) {
+			pr_err("Wrong number of siblings for route %p. I have %d routes, but count %d siblings\n",
+			       rt, rt6i_nsiblings, rt->rt6i_nsiblings);
+		}
+	}
+#endif
 	/*
 	 *	insert node
 	 */
@@ -1197,6 +1255,21 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
 	if (fn->rr_ptr == rt)
 		fn->rr_ptr = NULL;
 
+#ifdef CONFIG_IPV6_MULTIPATH
+	/* Remove this entry from other siblings */
+	if (rt->rt6i_nsiblings) {
+		struct rt6_info *sibling, *next_sibling;
+
+		/* For each siblings, decrement the counter of siblings */
+		list_for_each_entry_safe(sibling, next_sibling,
+					 &rt->rt6i_siblings, rt6i_siblings) {
+			sibling->rt6i_nsiblings--;
+		}
+		rt->rt6i_nsiblings = 0;
+		list_del_init(&rt->rt6i_siblings);
+	}
+#endif
+
 	/* Adjust walkers */
 	read_lock(&fib6_walker_lock);
 	FOR_WALKERS(w) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 399613b..563d671 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -57,6 +57,9 @@
 #include <net/xfrm.h>
 #include <net/netevent.h>
 #include <net/netlink.h>
+#ifdef CONFIG_IPV6_MULTIPATH
+#include <net/nexthop.h>
+#endif
 
 #include <asm/uaccess.h>
 
@@ -288,6 +291,10 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
 
 		memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst));
 		rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers);
+#ifdef CONFIG_IPV6_MULTIPATH
+		INIT_LIST_HEAD(&rt->rt6i_siblings);
+		rt->rt6i_nsiblings = 0;
+#endif
 	}
 	return rt;
 }
@@ -388,6 +395,122 @@ static bool rt6_need_strict(const struct in6_addr *daddr)
 		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK);
 }
 
+#ifdef CONFIG_IPV6_MULTIPATH
+/*
+ *	Multipath route selection.
+ */
+
+#ifdef CONFIG_IPV6_MULTIPATH_RANDOM
+/*
+ * Pseudo random candidate function
+ */
+static int rt6_info_hash_randomfn(unsigned int candidate_count)
+{
+	return random32() % candidate_count;
+}
+#endif
+
+#ifdef CONFIG_IPV6_MULTIPATH_RR
+/*
+ * Fake Round Robin candidate function
+ * If we want real RR, we need to add a counter in each route
+ */
+static int rt6_info_hash_falserr(unsigned int candidate_count)
+{
+	static unsigned int seed;
+	seed++;
+	return seed % candidate_count;
+}
+#endif
+
+#ifdef CONFIG_IPV6_MULTIPATH_ROUTE
+/*
+ * Pseudo random candidate using the src port, and other information
+ * Adapted from fib_info_hashfn()
+ */
+static int rt6_info_hash_nhsfn(unsigned int candidate_count,
+			       const struct flowi6 *fl6)
+{
+	unsigned int val = fl6->flowi6_proto;
+
+	val ^= fl6->daddr.s6_addr32[0];
+	val ^= fl6->daddr.s6_addr32[1];
+	val ^= fl6->daddr.s6_addr32[2];
+	val ^= fl6->daddr.s6_addr32[3];
+
+	val ^= fl6->saddr.s6_addr32[0];
+	val ^= fl6->saddr.s6_addr32[1];
+	val ^= fl6->saddr.s6_addr32[2];
+	val ^= fl6->saddr.s6_addr32[3];
+
+	/* Work only if this not encapsulated */
+	switch (fl6->flowi6_proto) {
+	case IPPROTO_UDP:
+	case IPPROTO_TCP:
+	case IPPROTO_SCTP:
+		val ^= fl6->fl6_sport;
+		val ^= fl6->fl6_dport;
+		break;
+
+	case IPPROTO_ICMPV6:
+		val ^= fl6->fl6_icmp_type;
+		val ^= fl6->fl6_icmp_code;
+		break;
+	}
+
+	/* Perhaps, we need to tune, this function? */
+	val = val ^ (val >> 7) ^ (val >> 12);
+	return val % candidate_count;
+}
+#endif
+
+/*
+ * This function return an index used to select (at random, round robin, ...)
+ * a route between any siblings.
+ *
+ * Note: fl6 can be NULL
+ */
+static unsigned int rt6_info_hashfn(const struct rt6_info *rt,
+				    const struct flowi6 *fl6)
+{
+	int candidate_count = rt->rt6i_nsiblings + 1;
+
+#if defined(CONFIG_IPV6_MULTIPATH_RR)
+	return rt6_info_hash_falserr(candidate_count);
+#elif defined(CONFIG_IPV6_MULTIPATH_RANDOM)
+	return rt6_info_hash_randomfn(candidate_count);
+#elif defined(CONFIG_IPV6_MULTIPATH_ROUTE)
+	if (fl6 == NULL)
+		return 0;
+	return rt6_info_hash_nhsfn(candidate_count, fl6);
+#else
+	return 0;
+#endif
+}
+
+static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
+					     struct flowi6 *fl6)
+{
+	struct rt6_info *sibling, *next_sibling;
+	int route_choosen;
+
+	route_choosen = rt6_info_hashfn(match, fl6);
+	/* Don't change the route, if route_choosen == 0
+	 * (siblings does not include ourself)
+	 */
+	if (route_choosen)
+		list_for_each_entry_safe(sibling, next_sibling,
+				&match->rt6i_siblings, rt6i_siblings) {
+			route_choosen--;
+			if (route_choosen == 0) {
+				match = sibling;
+				break;
+			}
+		}
+	return match;
+}
+#endif /* CONFIG_IPV6_MULTIPATH */
+
 /*
  *	Route lookup. Any table->tb6_lock is implied.
  */
@@ -705,6 +828,10 @@ static struct rt6_info *ip6_pol_route_lookup(struct net *net,
 restart:
 	rt = fn->leaf;
 	rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
+#ifdef CONFIG_IPV6_MULTIPATH
+	if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
+		rt = rt6_multipath_select(rt, fl6);
+#endif
 	BACKTRACK(net, &fl6->saddr);
 out:
 	dst_use(&rt->dst, jiffies);
@@ -866,7 +993,10 @@ restart_2:
 
 restart:
 	rt = rt6_select(fn, oif, strict | reachable);
-
+#ifdef CONFIG_IPV6_MULTIPATH
+	if (rt->rt6i_nsiblings && oif == 0)
+		rt = rt6_multipath_select(rt, fl6);
+#endif
 	BACKTRACK(net, &fl6->saddr);
 	if (rt == net->ipv6.ip6_null_entry ||
 	    rt->rt6i_flags & RTF_CACHE)
@@ -2247,6 +2377,9 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
 	[RTA_IIF]		= { .type = NLA_U32 },
 	[RTA_PRIORITY]          = { .type = NLA_U32 },
 	[RTA_METRICS]           = { .type = NLA_NESTED },
+#ifdef CONFIG_IPV6_MULTIPATH
+	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
+#endif
 };
 
 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -2324,11 +2457,69 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (tb[RTA_TABLE])
 		cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
 
+#ifdef CONFIG_IPV6_MULTIPATH
+	if (tb[RTA_MULTIPATH]) {
+		cfg->fc_mp = nla_data(tb[RTA_MULTIPATH]);
+		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
+	}
+#endif
+
 	err = 0;
 errout:
 	return err;
 }
 
+#ifdef CONFIG_IPV6_MULTIPATH
+static int ip6_route_multipath(struct fib6_config *cfg, int add)
+{
+	struct fib6_config r_cfg;
+	struct rtnexthop *rtnh;
+	int remaining;
+	int attrlen;
+	int err = 0, last_err = 0;
+
+beginning:
+	rtnh = (struct rtnexthop *)cfg->fc_mp;
+	remaining = cfg->fc_mp_len;
+
+	/* Parse a Multipath Entry */
+	while (rtnh_ok(rtnh, remaining)) {
+		memcpy(&r_cfg, cfg, sizeof(*cfg));
+		if (rtnh->rtnh_ifindex)
+			r_cfg.fc_ifindex = rtnh->rtnh_ifindex;
+
+		attrlen = rtnh_attrlen(rtnh);
+		if (attrlen > 0) {
+			struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
+
+			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
+			if (nla) {
+				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
+				r_cfg.fc_flags |= RTF_GATEWAY;
+			}
+		}
+		err = add ? ip6_route_add(&r_cfg) : ip6_route_del(&r_cfg);
+		if (err) {
+			last_err = err;
+			/* If we are trying to remove a route, do not stop the
+			 * loop when ip6_route_del() fails (because next hop is
+			 * already gone), we should try to remove all next hops.
+			 */
+			if (add) {
+				/* If add fails, we should try to delete all
+				 * next hops that have been already added.
+				 */
+				add = 0;
+				goto beginning;
+			}
+		}
+		rtnh = rtnh_next(rtnh, &remaining);
+	}
+
+	return last_err;
+}
+#endif /* CONFIG_IPV6_MULTIPATH */
+
 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 {
 	struct fib6_config cfg;
@@ -2338,7 +2529,12 @@ static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
 	if (err < 0)
 		return err;
 
-	return ip6_route_del(&cfg);
+#ifdef CONFIG_IPV6_MULTIPATH
+	if (cfg.fc_mp)
+		return ip6_route_multipath(&cfg, 0);
+	else
+#endif
+		return ip6_route_del(&cfg);
 }
 
 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
@@ -2350,7 +2546,12 @@ static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a
 	if (err < 0)
 		return err;
 
-	return ip6_route_add(&cfg);
+#ifdef CONFIG_IPV6_MULTIPATH
+	if (cfg.fc_mp)
+		return ip6_route_multipath(&cfg, 1);
+	else
+#endif
+		return ip6_route_add(&cfg);
 }
 
 static inline size_t rt6_nlmsg_size(void)
-- 
1.7.12

^ permalink raw reply related

* Re: [PATCH NEXT] rtlwifi: rtl8192c: rtl8192ce: Add support for B-CUT version of RTL8188CE
From: Anisse Astier @ 2012-09-12  8:37 UTC (permalink / raw)
  To: Larry Finger
  Cc: linville-2XuSBdqkA4R54TAoqtyWWQ,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA, Li Chaoming
In-Reply-To: <1347393962-7661-1-git-send-email-Larry.Finger-tQ5ms3gMjBLk1uMJSBkQmQ@public.gmane.org>

On Tue, 11 Sep 2012 15:06:02 -0500, Larry Finger <Larry.Finger-tQ5ms3gMjBLk1uMJSBkQmQ@public.gmane.org> wrote :

> Realtek devices with designation RTL8188CE-VL have the so-called B-cut
> of the wireless chip. This patch adds the special programming needed by
> these devices.
> 
> Signed-off-by: Larry Finger <Larry.Finger-tQ5ms3gMjBLk1uMJSBkQmQ@public.gmane.org>
> Cc: Anisse Astier <anisse-fwwRqrJYcP2HXe+LvDLADg@public.gmane.org>
> Cc: Li Chaoming <chaoming_li-kXabqFNEczNtrwSWzY7KCg@public.gmane.org>

Tested-by: Anisse Astier <anisse-fwwRqrJYcP2HXe+LvDLADg@public.gmane.org>

Fixes both problems I reported:
 - kernel freezing/lockup when fwlps=1 (default)
 - card not working, then freeze, even if fwlps=0.


> ---
>  drivers/net/wireless/rtlwifi/rtl8192c/phy_common.c |   21 +++++++
>  drivers/net/wireless/rtlwifi/rtl8192ce/def.h       |    3 +
>  drivers/net/wireless/rtlwifi/rtl8192ce/hw.c        |   61 ++++++++++++++++++--
>  drivers/net/wireless/rtlwifi/rtl8192ce/phy.c       |    4 +-
>  drivers/net/wireless/rtlwifi/rtl8192ce/sw.c        |    6 +-
>  drivers/net/wireless/rtlwifi/rtl8192ce/trx.c       |    4 +-
>  6 files changed, 87 insertions(+), 12 deletions(-)
> ---
> 
> John,
> 
> This patch has the patch entitled "rtlwifi: rtl8192ce: Log message that
> B_CUT device may not work" as a pre-requisite. Unlike the previous patch,
> this one is too invasive to backport to the stable kernels, thus it should
> be applied to 3.7.
> 
> Thanks,
> 
> Larry
> ---
> 

[snip]

> diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
> index 86d73b3..bae5269 100644
> --- a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
> +++ b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
> @@ -896,7 +896,6 @@ int rtl92ce_hw_init(struct ieee80211_hw *hw)
>  	struct rtl_phy *rtlphy = &(rtlpriv->phy);
>  	struct rtl_pci *rtlpci = rtl_pcidev(rtl_pcipriv(hw));
>  	struct rtl_ps_ctl *ppsc = rtl_psc(rtl_priv(hw));
> -	static bool iqk_initialized; /* initialized to false */
>  	bool rtstatus = true;
>  	bool is92c;
>  	int err;
> @@ -921,9 +920,28 @@ int rtl92ce_hw_init(struct ieee80211_hw *hw)
>  
>  	rtlhal->last_hmeboxnum = 0;
>  	rtl92c_phy_mac_config(hw);
> +	/* because last function modify RCR, so we update
> +	 * rcr var here, or TP will unstable for receive_config
> +	 * is wrong, RX RCR_ACRC32 will cause TP unstabel & Rx
> +	 * RCR_APP_ICV will cause mac80211 unassoc for cisco 1252*/
> +	rtlpci->receive_config = rtl_read_dword(rtlpriv, REG_RCR);
> +	rtlpci->receive_config &= ~(RCR_ACRC32 | RCR_AICV);
> +	rtl_write_dword(rtlpriv, REG_RCR, rtlpci->receive_config);
>  	rtl92c_phy_bb_config(hw);
>  	rtlphy->rf_mode = RF_OP_BY_SW_3WIRE;
>  	rtl92c_phy_rf_config(hw);
> +	if (IS_VENDOR_UMC_A_CUT(rtlhal->version) &&
> +	    !IS_92C_SERIAL(rtlhal->version)) {
> +		rtl_set_rfreg(hw, RF90_PATH_A, RF_RX_G1, MASKDWORD, 0x30255);
> +		rtl_set_rfreg(hw, RF90_PATH_A, RF_RX_G2, MASKDWORD, 0x50a00);
> +	} else if (IS_81xxC_VENDOR_UMC_B_CUT(rtlhal->version)) {
> +		rtl_set_rfreg(hw, RF90_PATH_A, 0x0C, MASKDWORD, 0x894AE);
> +		rtl_set_rfreg(hw, RF90_PATH_A, 0x0A, MASKDWORD, 0x1AF31);
> +		rtl_set_rfreg(hw, RF90_PATH_A, RF_IPA, MASKDWORD, 0x8F425);
> +		rtl_set_rfreg(hw, RF90_PATH_A, RF_SYN_G2, MASKDWORD, 0x4F200);
> +		rtl_set_rfreg(hw, RF90_PATH_A, RF_RCK1, MASKDWORD, 0x44053);
> +		rtl_set_rfreg(hw, RF90_PATH_A, RF_RCK2, MASKDWORD, 0x80201);
> +	}
>  	rtlphy->rfreg_chnlval[0] = rtl_get_rfreg(hw, (enum radio_path)0,
>  						 RF_CHNLBW, RFREG_OFFSET_MASK);
>  	rtlphy->rfreg_chnlval[1] = rtl_get_rfreg(hw, (enum radio_path)1,
> @@ -945,11 +963,11 @@ int rtl92ce_hw_init(struct ieee80211_hw *hw)
>  
>  	if (ppsc->rfpwr_state == ERFON) {
>  		rtl92c_phy_set_rfpath_switch(hw, 1);
> -		if (iqk_initialized) {
> +		if (rtlphy->iqk_initialized) {
>  			rtl92c_phy_iq_calibrate(hw, true);
>  		} else {
>  			rtl92c_phy_iq_calibrate(hw, false);
> -			iqk_initialized = true;
> +			rtlphy->iqk_initialized = true;
>  		}
>  
>  		rtl92c_dm_check_txpower_tracking(hw);
> @@ -1004,6 +1022,13 @@ static enum version_8192c _rtl92ce_read_chip_version(struct ieee80211_hw *hw)
>  				   ? CHIP_VENDOR_UMC_B_CUT : CHIP_UNKNOWN) |
>  				   CHIP_VENDOR_UMC));
>  		}
> +		if (IS_92C_SERIAL(version)) {
> +			value32 = rtl_read_dword(rtlpriv, REG_HPON_FSM);
> +			version = (enum version_8192c)(version |
> +				   ((CHIP_BONDING_IDENTIFIER(value32)
> +				   == CHIP_BONDING_92C_1T2R) ?
> +				   RF_TYPE_1T2R : 0));
> +		}
>  	}
>  
>  	switch (version) {
> @@ -1019,12 +1044,30 @@ static enum version_8192c _rtl92ce_read_chip_version(struct ieee80211_hw *hw)
>  	case VERSION_A_CHIP_88C:
>  		versionid = "A_CHIP_88C";
>  		break;
> +	case VERSION_NORMAL_UMC_CHIP_92C_1T2R_A_CUT:
> +		versionid = "A_CUT_92C_1T2R";
> +		break;
> +	case VERSION_NORMAL_UMC_CHIP_92C_A_CUT:
> +		versionid = "A_CUT_92C";
> +		break;
> +	case VERSION_NORMAL_UMC_CHIP_88C_A_CUT:
> +		versionid = "A_CUT_88C";
> +		break;
> +	case VERSION_NORMAL_UMC_CHIP_92C_1T2R_B_CUT:
> +		versionid = "B_CUT_92C_1T2R";
> +		break;
> +	case VERSION_NORMAL_UMC_CHIP_92C_B_CUT:
> +		versionid = "B_CUT_92C";
> +		break;
> +	case VERSION_NORMAL_UMC_CHIP_88C_B_CUT:
> +		versionid = "B_CUT_88C";
> +		break;
>  	default:
>  		versionid = "Unknown. Bug?";
>  		break;
>  	}
>  
> -	RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE,
> +	RT_TRACE(rtlpriv, COMP_INIT, DBG_EMERG,
>  		 "Chip Version ID: %s\n", versionid);
>  
>  	switch (version & 0x3) {
> @@ -1197,6 +1240,7 @@ static void _rtl92ce_poweroff_adapter(struct ieee80211_hw *hw)
>  {
>  	struct rtl_priv *rtlpriv = rtl_priv(hw);
>  	struct rtl_pci_priv *rtlpcipriv = rtl_pcipriv(hw);
> +	struct rtl_hal *rtlhal = rtl_hal(rtlpriv);
>  	u8 u1b_tmp;
>  	u32 u4b_tmp;
>  
> @@ -1225,7 +1269,8 @@ static void _rtl92ce_poweroff_adapter(struct ieee80211_hw *hw)
>  	rtl_write_word(rtlpriv, REG_GPIO_IO_SEL, 0x0790);
>  	rtl_write_word(rtlpriv, REG_LEDCFG0, 0x8080);
>  	rtl_write_byte(rtlpriv, REG_AFE_PLL_CTRL, 0x80);
> -	rtl_write_byte(rtlpriv, REG_SPS0_CTRL, 0x23);
> +	if (!IS_81xxC_VENDOR_UMC_B_CUT(rtlhal->version))
> +		rtl_write_byte(rtlpriv, REG_SPS0_CTRL, 0x23);
>  	if (rtlpcipriv->bt_coexist.bt_coexistence) {
>  		u4b_tmp = rtl_read_dword(rtlpriv, REG_AFE_XTAL_CTRL);
>  		u4b_tmp |= 0x03824800;
> @@ -1254,6 +1299,9 @@ void rtl92ce_card_disable(struct ieee80211_hw *hw)
>  		rtlpriv->cfg->ops->led_control(hw, LED_CTL_POWER_OFF);
>  	RT_SET_PS_LEVEL(ppsc, RT_RF_OFF_LEVL_HALT_NIC);
>  	_rtl92ce_poweroff_adapter(hw);
> +
> +	/* after power off we should do iqk again */
> +	rtlpriv->phy.iqk_initialized = false;
>  }
>  
>  void rtl92ce_interrupt_recognized(struct ieee80211_hw *hw,
> @@ -1912,6 +1960,8 @@ static void rtl92ce_update_hal_rate_mask(struct ieee80211_hw *hw,
>  			ratr_bitmap &= 0x0f0ff0ff;
>  		break;
>  	}
> +	sta_entry->ratr_index = ratr_index;
> +
>  	RT_TRACE(rtlpriv, COMP_RATR, DBG_DMESG,
>  		 "ratr_bitmap :%x\n", ratr_bitmap);
>  	*(u32 *)&rate_mask = (ratr_bitmap & 0x0fffffff) |
> @@ -2291,3 +2341,4 @@ void rtl92ce_suspend(struct ieee80211_hw *hw)
>  void rtl92ce_resume(struct ieee80211_hw *hw)
>  {
>  }
> +
Whitespace.


Reviewed-by: Anisse Astier <anisse-fwwRqrJYcP2HXe+LvDLADg@public.gmane.org>

Regards,

Anisse
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [RFC PATCH net-next 1/1] ipv6: add support of ECMP
From: YOSHIFUJI Hideaki @ 2012-09-12  8:48 UTC (permalink / raw)
  To: Nicolas Dichtel; +Cc: bernat, netdev, davem, YOSHIFUJI Hideaki
In-Reply-To: <1347438597-4233-2-git-send-email-nicolas.dichtel@6wind.com>

Hello.

Nicolas Dichtel wrote:
> This patch adds the support of equal cost multipath for IPv6.
> 
> The patch is based on a previous work from
> Luc Saillard <luc.saillard@6wind.com>.
> 
> Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
:
> +config IPV6_MULTIPATH
> +	bool "IPv6: equal cost multipath for IPv6 routing"
> +	depends on IPV6
> +	default y
> +	---help---
> +	  Enable this option to support ECMP for IPv6.
> +	  If unsure, say N.
> +
> +choice
> +	prompt "IPv6: choose Multipath algorithm"
> +	depends on IPV6_MULTIPATH
> +	default IPV6_MULTIPATH_ROUTE
> +	---help---
> +	  Define the method to select route between each possible path.
> +
> +	config IPV6_MULTIPATH_ROUTE
> +	bool "IPv6: MULTIPATH flow algorithm"
> +	---help---
> +	  Multipath routes are chosen according to hash of packet header to
> +	  ensure a flow keeps the same route.
> +
> +	config IPV6_MULTIPATH_RR
> +	bool "IPv6: MULTIPATH round robin algorithm"
> +	---help---
> +	  Multipath routes are chosen according to Round Robin.
> +
> +	config IPV6_MULTIPATH_RANDOM
> +	bool "IPv6: MULTIPATH random algorithm"
> +	---help---
> +	  Multipath routes are chosen in a random fashion.
> +endchoice

We should use hash-based algorithm by default,
according to RFC4311.  See also RFC6438.

Regards,

--yoshfuji

^ permalink raw reply

* Re: GRO aggregation
From: Shlomo Pongartz @ 2012-09-12  9:23 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev@vger.kernel.org
In-Reply-To: <1347392132.13103.663.camel@edumazet-glaptop>

On 9/11/2012 10:35 PM, Eric Dumazet wrote:
> On Tue, 2012-09-11 at 19:24 +0000, Shlomo Pongratz wrote:
>
>> I see that in ixgbe the weight for the NAPI is 64 (netif_napi_add). So
>> if packets are arriving in high rate then an the CPU is fast enough to
>> collect the packets as they arrive, assuming packets continue to
>> arrives while the NAPI runs. Then it should have aggregate more. So we
>> will have less passes trough the stack.
>>
> As I said, _if_ your cpu was loaded by other stuff, then you would see
> biggest GRO packets.
>
> GRO is not : "We want to kill latency and have big packets just because
> its better"
>
> Its more like : If load is big enough, try to aggregate TCP frames in
> less skbs.
>
>
>
>
First I want to apologize for breaking the mailing thread. I wasn't at 
work and used webmail.

I agree with your but I think that something is still strange.
On the transmitter side all the offloading are enabled, e.g. TSO and GSO.
The tcpdump on the sender side shows size of 64240 which is 44 packets 
of 1460 each.
Now since the offloading are enabled the HW should transmit 44 frames 
back to back,
that is in a burst of 44 * 1500 bytes, which according to my calculation 
should take 52.8 micro on 10G Ethernet.
Using ethtool I've set the rx-usecs to 1022 micro, which I think is the 
maximal value for ixgbe.
Note that there is no way to set rx-frames on ixgbe.
Now since the ixgbe weight is 64 I expected that the NAPI will be able 
to poll for more then 21 packets,
since 44 packets came in one burst.
However the results remains the same.

Shlomo.

^ permalink raw reply

* Re: GRO aggregation
From: Eric Dumazet @ 2012-09-12  9:33 UTC (permalink / raw)
  To: Shlomo Pongartz; +Cc: netdev@vger.kernel.org
In-Reply-To: <505054AE.9040901@mellanox.com>

On Wed, 2012-09-12 at 12:23 +0300, Shlomo Pongartz wrote:
> On 9/11/2012 10:35 PM, Eric Dumazet wrote:
> > On Tue, 2012-09-11 at 19:24 +0000, Shlomo Pongratz wrote:
> >
> >> I see that in ixgbe the weight for the NAPI is 64 (netif_napi_add). So
> >> if packets are arriving in high rate then an the CPU is fast enough to
> >> collect the packets as they arrive, assuming packets continue to
> >> arrives while the NAPI runs. Then it should have aggregate more. So we
> >> will have less passes trough the stack.
> >>
> > As I said, _if_ your cpu was loaded by other stuff, then you would see
> > biggest GRO packets.
> >
> > GRO is not : "We want to kill latency and have big packets just because
> > its better"
> >
> > Its more like : If load is big enough, try to aggregate TCP frames in
> > less skbs.
> >
> >
> >
> >
> First I want to apologize for breaking the mailing thread. I wasn't at 
> work and used webmail.
> 
> I agree with your but I think that something is still strange.
> On the transmitter side all the offloading are enabled, e.g. TSO and GSO.
> The tcpdump on the sender side shows size of 64240 which is 44 packets 
> of 1460 each.
> Now since the offloading are enabled the HW should transmit 44 frames 
> back to back,
> that is in a burst of 44 * 1500 bytes, which according to my calculation 
> should take 52.8 micro on 10G Ethernet.
> Using ethtool I've set the rx-usecs to 1022 micro, which I think is the 
> maximal value for ixgbe.
> Note that there is no way to set rx-frames on ixgbe.
> Now since the ixgbe weight is 64 I expected that the NAPI will be able 
> to poll for more then 21 packets,
> since 44 packets came in one burst.
> However the results remains the same.

TSO uses PAGE frags, so 64KB needs about 16 pages.

tcp_sendmsg() could even use order-3 pages, so that only 2 pages would
be needed to fill 64KB of data.

GRO uses whatever fragment size provided by NIC, depending on MTU.

One skb has a limit on number of frags.

Handling a huge array of frags would be actually slower in some helper
functions.

Since you dont exactly describe why you ask all these questions, its
hard to guess what problem you try to solve.

^ permalink raw reply

* Re: [RFC PATCH net-next 1/1] ipv6: add support of ECMP
From: YOSHIFUJI Hideaki @ 2012-09-12  9:42 UTC (permalink / raw)
  To: Nicolas Dichtel; +Cc: YOSHIFUJI Hideaki, bernat, netdev, davem
In-Reply-To: <50504C72.1090500@linux-ipv6.org>

Hello.

YOSHIFUJI Hideaki wrote:
> Hello.
> 
> Nicolas Dichtel wrote:
>> This patch adds the support of equal cost multipath for IPv6.
>>
>> The patch is based on a previous work from
>> Luc Saillard <luc.saillard@6wind.com>.
>>
>> Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
> :
>> +config IPV6_MULTIPATH
>> +	bool "IPv6: equal cost multipath for IPv6 routing"
>> +	depends on IPV6
>> +	default y
>> +	---help---
>> +	  Enable this option to support ECMP for IPv6.
>> +	  If unsure, say N.
>> +
>> +choice
>> +	prompt "IPv6: choose Multipath algorithm"
>> +	depends on IPV6_MULTIPATH
>> +	default IPV6_MULTIPATH_ROUTE
>> +	---help---
>> +	  Define the method to select route between each possible path.
>> +
>> +	config IPV6_MULTIPATH_ROUTE
>> +	bool "IPv6: MULTIPATH flow algorithm"
>> +	---help---
>> +	  Multipath routes are chosen according to hash of packet header to
>> +	  ensure a flow keeps the same route.
>> +
>> +	config IPV6_MULTIPATH_RR
>> +	bool "IPv6: MULTIPATH round robin algorithm"
>> +	---help---
>> +	  Multipath routes are chosen according to Round Robin.
>> +
>> +	config IPV6_MULTIPATH_RANDOM
>> +	bool "IPv6: MULTIPATH random algorithm"
>> +	---help---
>> +	  Multipath routes are chosen in a random fashion.
>> +endchoice
> 
> We should use hash-based algorithm by default,
> according to RFC4311.  See also RFC6438.

Sorry, I missed something and misunderstood.


I prefer "HASH" of "FLOW" instead of "ROUTE"
because it select route by "hash" or "flow"
(as other options mean; by "round-robin"(RR) or by "random"(RANDOM)).

And, please clearly specify that it is the recommended
the default and recommended algorithm.
(We may have references to RFCs.)

Default is "y" but description says "if unsure, say N."
This is not good.


Of course, we may want to take "flow label" into account
when calculating hash (RFC6438).

Regards,

-----
[*] IPv6: equal cost multipath for IPv6 routing

    Enable this option to support ECMP for IPv6.

 [*] IPv6: MULTIPATH hash-based algorithm

      Multipath routes are chosen according to hash of packet
      header information (source, destination, ...)
      to ensure a flow keeps the same route.

      This is the default and recommended.

 [ ] IPv6: MULTIPATH round-robin algorithm
 [ ] IPv6: MULTIPATH random algorithm

--yoshfuji

^ permalink raw reply

* Re: [RFC PATCH net-next 1/1] ipv6: add support of ECMP
From: Nicolas Dichtel @ 2012-09-12  9:53 UTC (permalink / raw)
  To: YOSHIFUJI Hideaki; +Cc: bernat, netdev, davem
In-Reply-To: <505058F5.9020707@linux-ipv6.org>

Le 12/09/2012 11:42, YOSHIFUJI Hideaki a écrit :
> Hello.
> 
> YOSHIFUJI Hideaki wrote:
>> Hello.
>>
>> Nicolas Dichtel wrote:
>>> This patch adds the support of equal cost multipath for IPv6.
>>>
>>> The patch is based on a previous work from
>>> Luc Saillard <luc.saillard@6wind.com>.
>>>
>>> Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
>> :
>>> +config IPV6_MULTIPATH
>>> +	bool "IPv6: equal cost multipath for IPv6 routing"
>>> +	depends on IPV6
>>> +	default y
>>> +	---help---
>>> +	  Enable this option to support ECMP for IPv6.
>>> +	  If unsure, say N.
>>> +
>>> +choice
>>> +	prompt "IPv6: choose Multipath algorithm"
>>> +	depends on IPV6_MULTIPATH
>>> +	default IPV6_MULTIPATH_ROUTE
>>> +	---help---
>>> +	  Define the method to select route between each possible path.
>>> +
>>> +	config IPV6_MULTIPATH_ROUTE
>>> +	bool "IPv6: MULTIPATH flow algorithm"
>>> +	---help---
>>> +	  Multipath routes are chosen according to hash of packet header to
>>> +	  ensure a flow keeps the same route.
>>> +
>>> +	config IPV6_MULTIPATH_RR
>>> +	bool "IPv6: MULTIPATH round robin algorithm"
>>> +	---help---
>>> +	  Multipath routes are chosen according to Round Robin.
>>> +
>>> +	config IPV6_MULTIPATH_RANDOM
>>> +	bool "IPv6: MULTIPATH random algorithm"
>>> +	---help---
>>> +	  Multipath routes are chosen in a random fashion.
>>> +endchoice
>>
>> We should use hash-based algorithm by default,
>> according to RFC4311.  See also RFC6438.
> 
> Sorry, I missed something and misunderstood.
> 
> 
> I prefer "HASH" of "FLOW" instead of "ROUTE"
> because it select route by "hash" or "flow"
> (as other options mean; by "round-robin"(RR) or by "random"(RANDOM)).
Ok.

> 
> And, please clearly specify that it is the recommended
> the default and recommended algorithm.
> (We may have references to RFCs.)
Ok.

> 
> Default is "y" but description says "if unsure, say N."
> This is not good.
Yes, good catch.

> 
> 
> Of course, we may want to take "flow label" into account
> when calculating hash (RFC6438).
Ok, I will add it. I wait for others comments.


Regards,
Nicolas

^ permalink raw reply

* Re: [v2 PATCH 2/2] netprio_cgroup: Use memcpy instead of the for-loop to copy priomap
From: Srivatsa S. Bhat @ 2012-09-12 10:23 UTC (permalink / raw)
  To: David Miller
  Cc: nhorman, David.Laight, john.r.fastabend, gaofeng, eric.dumazet,
	mark.d.rustad, lizefan, netdev, linux-kernel
In-Reply-To: <505046A5.1050009@linux.vnet.ibm.com>

On 09/12/2012 01:54 PM, Srivatsa S. Bhat wrote:
> On 09/12/2012 01:19 PM, David Miller wrote:
>> From: "Srivatsa S. Bhat" <srivatsa.bhat@linux.vnet.ibm.com>
>> Date: Wed, 12 Sep 2012 11:37:47 +0530
>>
>>> +		memcpy(new_priomap->priomap, old_priomap->priomap,
>>> +			old_priomap->priomap_len *
>>> +					sizeof(old_priomap->priomap[0]));
>>
>> This argument indentation is ridiculous.  Try:
>>
>> 		memcpy(new_priomap->priomap, old_priomap->priomap,
>> 		       old_priomap->priomap_len *
>> 		       sizeof(old_priomap->priomap[0]));
>>
>> Using TABs exclusively for argumentat indentation is not the goal.
>>
>> Rather, lining the arguments up properly so that they sit at the first
>> column after the first line's openning parenthesis is what you should
>> be trying to achieve.
> 
> OK, will fix it, thanks!
> 
>>
>> And ignoring whatever stylistic convention we may or may not have, I
>> find it impossibly hard to believe that the code quoted above looks
>> good even to you.
>>
> 
> On second thoughts, I think the memcpy in this case will actually be worse
> since it will copy the contents in chunks of smaller size than the for-loop.

Oops, I missed the __HAVE_ARCH_MEMCPY and was looking at the wrong memcpy
implementation.. And in any case, I went totally off-track by your last comment.
I hadn't realized that you were still referring to the way the code looks, rather
than questioning the switch to memcpy. Sorry about that!

I'll fix the odd-looking indentation and repost the patch.
 
Regards,
Srivatsa S. Bhat

^ permalink raw reply

* Re: [PATCH net-next V3 1/2] IB/ipoib: Add rtnl_link_ops support
From: Or Gerlitz @ 2012-09-12 10:40 UTC (permalink / raw)
  To: Patrick McHardy, Eric Dumazet; +Cc: netdev, Shlomo Pongratz
In-Reply-To: <Pine.GSO.4.63.1208291450330.1098@stinky-local.trash.net>

Patrick McHardy wrote:
> Or Gerlitz wrote:
>
>> +#define IFLA_IPOIB_MAX (__IFLA_IPOIB_MAX - 1)
>
> This should go into include/linux/if_link.h
>

This comment was easy to fix... HOWEVER, using V3 -- this posting
http://marc.info/?l=linux-netdev&m=134572609226343&w=2 -- of the patch
over latest net-next (commit 280050cc81ccb2e06e4061228ee34c0cc86b1560
"x86 bpf_jit: support MOD operation"), when I just run the following trivial
sequence which loads the module and creates/deletes legacy child

$ modprobe ib_ipoib debug_level=1
$ echo 0x8001 > /sys/class/net/ib0/create_child
$ echo 0x8001 > /sys/class/net/ib0/delete_child
$ modprobe -r ib_ipoib

I get the below lockdep warning, pointing to ipoib_vlan_delete which is
by not means called by the module unload sequence, confusing... any idea?

Or.

======================================================
[ INFO: possible circular locking dependency detected ]
3.6.0-rc3+ #144 Not tainted
-------------------------------------------------------
modprobe/4443 is trying to acquire lock:
  (s_active#155){++++.+}, at: [<ffffffff8114f93f>] 
sysfs_addrm_finish+0x29/0x52

but task is already holding lock:
  (rtnl_mutex){+.+.+.}, at: [<ffffffff812fc103>] rtnl_lock+0x12/0x14

which lock already depends on the new lock.


the existing dependency chain (in reverse order) is:

-> #1 (rtnl_mutex){+.+.+.}:
        [<ffffffff81072b30>] lock_acquire+0x14f/0x19b
        [<ffffffff81396a43>] mutex_lock_nested+0x64/0x2ce
        [<ffffffff812fc103>] rtnl_lock+0x12/0x14
        [<ffffffff812eecf1>] netdev_run_todo+0xa5/0x27e
        [<ffffffff812fc0dd>] rtnl_unlock+0x9/0xb
        [<ffffffffa0394889>] ipoib_vlan_delete+0x111/0x148 [ib_ipoib]
        [<ffffffffa038d29b>] delete_child+0x44/0x60 [ib_ipoib]
        [<ffffffff81247bd8>] dev_attr_store+0x1b/0x1d
        [<ffffffff8114e223>] sysfs_write_file+0x103/0x13f
        [<ffffffff810f206b>] vfs_write+0xae/0x133
        [<ffffffff810f21a9>] sys_write+0x45/0x6c
        [<ffffffff813a05e2>] system_call_fastpath+0x16/0x1b

-> #0 (s_active#155){++++.+}:
        [<ffffffff81072364>] __lock_acquire+0x10d1/0x174e
        [<ffffffff81072b30>] lock_acquire+0x14f/0x19b
        [<ffffffff8114ef79>] sysfs_deactivate+0x93/0xca
        [<ffffffff8114f93f>] sysfs_addrm_finish+0x29/0x52
        [<ffffffff8114fa36>] sysfs_remove_dir+0x8b/0x9e
        [<ffffffff81199c6d>] kobject_del+0x16/0x37
        [<ffffffff812491ca>] device_del+0x18f/0x19f
        [<ffffffff813000d3>] netdev_unregister_kobject+0x52/0x57
        [<ffffffff812eeacc>] rollback_registered_many+0x238/0x27c
        [<ffffffff812eebe9>] unregister_netdevice_queue+0x7f/0xbf
        [<ffffffff812eec45>] unregister_netdev+0x1c/0x23
        [<ffffffffa038d1eb>] ipoib_remove_one+0xad/0xe7 [ib_ipoib]
        [<ffffffffa01a89ec>] ib_unregister_client+0x3d/0x11c [ib_core]
        [<ffffffffa0398fcf>] ipoib_cleanup_module+0x2f/0x4e [ib_ipoib]
        [<ffffffff8107d81d>] sys_delete_module+0x1ac/0x210
        [<ffffffff813a05e2>] system_call_fastpath+0x16/0x1b

other info that might help us debug this:

  Possible unsafe locking scenario:

        CPU0                    CPU1
        ----                    ----
   lock(rtnl_mutex);
                                lock(s_active#155);
                                lock(rtnl_mutex);
   lock(s_active#155);

  *** DEADLOCK ***

2 locks held by modprobe/4443:
  #0:  (device_mutex){+.+.+.}, at: [<ffffffffa01a89d1>] 
ib_unregister_client+0x22/0x11c [ib_core]
  #1:  (rtnl_mutex){+.+.+.}, at: [<ffffffff812fc103>] rtnl_lock+0x12/0x14

stack backtrace:
Pid: 4443, comm: modprobe Not tainted 3.6.0-rc3+ #144
Call Trace:
  [<ffffffff8102dad8>] ? console_unlock+0x329/0x37e
  [<ffffffff81070c15>] print_circular_bug+0x28e/0x29f
  [<ffffffff81072364>] __lock_acquire+0x10d1/0x174e
  [<ffffffff8114f93f>] ? sysfs_addrm_finish+0x29/0x52
  [<ffffffff81072b30>] lock_acquire+0x14f/0x19b
  [<ffffffff8114f93f>] ? sysfs_addrm_finish+0x29/0x52
  [<ffffffff8114ef79>] sysfs_deactivate+0x93/0xca
  [<ffffffff8114f93f>] ? sysfs_addrm_finish+0x29/0x52
  [<ffffffff8114f93f>] sysfs_addrm_finish+0x29/0x52
  [<ffffffff8114fa36>] sysfs_remove_dir+0x8b/0x9e
  [<ffffffff81199c6d>] kobject_del+0x16/0x37
  [<ffffffff812491ca>] device_del+0x18f/0x19f
  [<ffffffff813000d3>] netdev_unregister_kobject+0x52/0x57
  [<ffffffff812eeacc>] rollback_registered_many+0x238/0x27c
  [<ffffffff812eebe9>] unregister_netdevice_queue+0x7f/0xbf
  [<ffffffff812eec45>] unregister_netdev+0x1c/0x23
  [<ffffffffa038d1eb>] ipoib_remove_one+0xad/0xe7 [ib_ipoib]
  [<ffffffffa01a89ec>] ib_unregister_client+0x3d/0x11c [ib_core]
  [<ffffffffa0398fcf>] ipoib_cleanup_module+0x2f/0x4e [ib_ipoib]
  [<ffffffff8107d81d>] sys_delete_module+0x1ac/0x210
  [<ffffffff8106fc4f>] ? trace_hardirqs_on_caller+0x11e/0x155
  [<ffffffff811a2a0e>] ? trace_hardirqs_on_thunk+0x3a/0x3f
  [<ffffffff813a05e2>] system_call_fastpath+0x16/0x1b

^ permalink raw reply

* [PATCH V2] Generalise "auto-negotiation done" function, move generic PHY code to phy_device.c
From: Alexander Sverdlin @ 2012-09-12 11:11 UTC (permalink / raw)
  To: netdev, Andy Fleming, davem

From: Alexander Sverdlin <alexander.sverdlin@sysgo.com>

Generalise "auto-negotiation done" function, move generic PHY code to phy_device.c 

Not all devices have "auto-negotiation done" bit at the place, as expected by
phy_aneg_done() in phy.c. Example of such device is Marvell 88E61xx Ethernet 
switch which could be controlled by Linux PHY layer, if struct phy_driver had
abstraction for above function. So move hardware-dependent implementation details
for "generic" PHY to phy_device.c, and modify all PHY drivers to use new field.
Now phy.c contains only high-level state-machine functionality, leaving 
hardware-layer to different drivers.
V2 is patch against net-next, as one additional PHY driver was added.

Signed-off-by: Alexander Sverdlin <alexander.sverdlin@sysgo.com>
---
 Documentation/networking/phy.txt |    9 +++++----
 drivers/net/phy/amd.c            |    1 +
 drivers/net/phy/bcm63xx.c        |    2 ++
 drivers/net/phy/bcm87xx.c        |    2 ++
 drivers/net/phy/broadcom.c       |   11 +++++++++++
 drivers/net/phy/cicada.c         |    2 ++
 drivers/net/phy/davicom.c        |    3 +++
 drivers/net/phy/dp83640.c        |    1 +
 drivers/net/phy/et1011c.c        |    1 +
 drivers/net/phy/icplus.c         |    3 +++
 drivers/net/phy/lxt.c            |    3 +++
 drivers/net/phy/marvell.c        |    9 +++++++++
 drivers/net/phy/micrel.c         |    5 +++++
 drivers/net/phy/national.c       |    1 +
 drivers/net/phy/phy.c            |   22 ++--------------------
 drivers/net/phy/phy_device.c     |   18 ++++++++++++++++++
 drivers/net/phy/qsemi.c          |    1 +
 drivers/net/phy/realtek.c        |    1 +
 drivers/net/phy/smsc.c           |    5 +++++
 drivers/net/phy/ste10Xp.c        |    2 ++
 drivers/net/phy/vitesse.c        |    2 ++
 include/linux/phy.h              |   12 ++++++++++--
 22 files changed, 90 insertions(+), 26 deletions(-)

diff --git a/Documentation/networking/phy.txt b/Documentation/networking/phy.txt
index 95e5f59..f75fd24 100644
--- a/Documentation/networking/phy.txt
+++ b/Documentation/networking/phy.txt
@@ -1,7 +1,7 @@
 
 -------
 PHY Abstraction Layer
-(Updated 2008-04-08)
+(Updated 2012-09-06)
 
 Purpose
 
@@ -257,15 +257,16 @@ Writing a PHY driver
    probe: Does any setup needed by the driver
    suspend/resume: power management
    config_aneg: Changes the speed/duplex/negotiation settings
+   aneg_done: Reads current auto-negotiation state
    read_status: Reads the current speed/duplex/negotiation settings
    ack_interrupt: Clear a pending interrupt
    config_intr: Enable or disable interrupts
    remove: Does any driver take-down
 
- Of these, only config_aneg and read_status are required to be
+ Of these, only config_aneg, aneg_done and read_status are required to be
  assigned by the driver code.  The rest are optional.  Also, it is
- preferred to use the generic phy driver's versions of these two
- functions if at all possible: genphy_read_status and
+ preferred to use the generic phy driver's versions of these three
+ functions if at all possible: genphy_read_status, genphy_aneg_done and
  genphy_config_aneg.  If this is not possible, it is likely that
  you only need to perform some actions before and after invoking
  these functions, and so your functions will wrap the generic
diff --git a/drivers/net/phy/amd.c b/drivers/net/phy/amd.c
index a3fb5ce..286596c 100644
--- a/drivers/net/phy/amd.c
+++ b/drivers/net/phy/amd.c
@@ -69,6 +69,7 @@ static struct phy_driver am79c_driver = {
 	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= am79c_config_init,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= am79c_ack_interrupt,
 	.config_intr	= am79c_config_intr,
diff --git a/drivers/net/phy/bcm63xx.c b/drivers/net/phy/bcm63xx.c
index 84c7a39..5953d1a 100644
--- a/drivers/net/phy/bcm63xx.c
+++ b/drivers/net/phy/bcm63xx.c
@@ -81,6 +81,7 @@ static struct phy_driver bcm63xx_driver[] = {
 	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= bcm63xx_config_init,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= bcm63xx_ack_interrupt,
 	.config_intr	= bcm63xx_config_intr,
@@ -94,6 +95,7 @@ static struct phy_driver bcm63xx_driver[] = {
 	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= bcm63xx_config_init,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= bcm63xx_ack_interrupt,
 	.config_intr	= bcm63xx_config_intr,
diff --git a/drivers/net/phy/bcm87xx.c b/drivers/net/phy/bcm87xx.c
index 2346b38..9fefbad 100644
--- a/drivers/net/phy/bcm87xx.c
+++ b/drivers/net/phy/bcm87xx.c
@@ -195,6 +195,7 @@ static struct phy_driver bcm87xx_driver[] = {
 	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= bcm87xx_config_init,
 	.config_aneg	= bcm87xx_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= bcm87xx_read_status,
 	.ack_interrupt	= bcm87xx_ack_interrupt,
 	.config_intr	= bcm87xx_config_intr,
@@ -208,6 +209,7 @@ static struct phy_driver bcm87xx_driver[] = {
 	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= bcm87xx_config_init,
 	.config_aneg	= bcm87xx_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= bcm87xx_read_status,
 	.ack_interrupt	= bcm87xx_ack_interrupt,
 	.config_intr	= bcm87xx_config_intr,
diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index f8c90ea..aaeb9ee 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -692,6 +692,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= bcm54xx_ack_interrupt,
 	.config_intr	= bcm54xx_config_intr,
@@ -705,6 +706,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= bcm54xx_ack_interrupt,
 	.config_intr	= bcm54xx_config_intr,
@@ -718,6 +720,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= bcm54xx_ack_interrupt,
 	.config_intr	= bcm54xx_config_intr,
@@ -731,6 +734,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= bcm54xx_ack_interrupt,
 	.config_intr	= bcm54xx_config_intr,
@@ -744,6 +748,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= bcm5481_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= bcm54xx_ack_interrupt,
 	.config_intr	= bcm54xx_config_intr,
@@ -757,6 +762,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.config_init	= bcm5482_config_init,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= bcm5482_read_status,
 	.ack_interrupt	= bcm54xx_ack_interrupt,
 	.config_intr	= bcm54xx_config_intr,
@@ -770,6 +776,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= bcm54xx_ack_interrupt,
 	.config_intr	= bcm54xx_config_intr,
@@ -783,6 +790,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= bcm54xx_ack_interrupt,
 	.config_intr	= bcm54xx_config_intr,
@@ -796,6 +804,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.config_init	= bcm54xx_config_init,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= bcm54xx_ack_interrupt,
 	.config_intr	= bcm54xx_config_intr,
@@ -809,6 +818,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.config_init	= brcm_fet_config_init,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= brcm_fet_ack_interrupt,
 	.config_intr	= brcm_fet_config_intr,
@@ -822,6 +832,7 @@ static struct phy_driver broadcom_drivers[] = {
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.config_init	= brcm_fet_config_init,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= brcm_fet_ack_interrupt,
 	.config_intr	= brcm_fet_config_intr,
diff --git a/drivers/net/phy/cicada.c b/drivers/net/phy/cicada.c
index db472ff..7613ec8 100644
--- a/drivers/net/phy/cicada.c
+++ b/drivers/net/phy/cicada.c
@@ -111,6 +111,7 @@ static struct phy_driver cis820x_driver[] = {
 	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= &cis820x_config_init,
 	.config_aneg	= &genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= &genphy_read_status,
 	.ack_interrupt	= &cis820x_ack_interrupt,
 	.config_intr	= &cis820x_config_intr,
@@ -123,6 +124,7 @@ static struct phy_driver cis820x_driver[] = {
 	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= &cis820x_config_init,
 	.config_aneg	= &genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= &genphy_read_status,
 	.ack_interrupt	= &cis820x_ack_interrupt,
 	.config_intr	= &cis820x_config_intr,
diff --git a/drivers/net/phy/davicom.c b/drivers/net/phy/davicom.c
index 81c7bc0..90b337d 100644
--- a/drivers/net/phy/davicom.c
+++ b/drivers/net/phy/davicom.c
@@ -152,6 +152,7 @@ static struct phy_driver dm91xx_driver[] = {
 	.features	= PHY_BASIC_FEATURES,
 	.config_init	= dm9161_config_init,
 	.config_aneg	= dm9161_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.driver		= { .owner = THIS_MODULE,},
 }, {
@@ -161,6 +162,7 @@ static struct phy_driver dm91xx_driver[] = {
 	.features	= PHY_BASIC_FEATURES,
 	.config_init	= dm9161_config_init,
 	.config_aneg	= dm9161_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.driver		= { .owner = THIS_MODULE,},
 }, {
@@ -170,6 +172,7 @@ static struct phy_driver dm91xx_driver[] = {
 	.features	= PHY_BASIC_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= dm9161_ack_interrupt,
 	.config_intr	= dm9161_config_intr,
diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index b0da022..8c29ceb 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -1257,6 +1257,7 @@ static struct phy_driver dp83640_driver = {
 	.probe		= dp83640_probe,
 	.remove		= dp83640_remove,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.ts_info	= dp83640_ts_info,
 	.hwtstamp	= dp83640_hwtstamp,
diff --git a/drivers/net/phy/et1011c.c b/drivers/net/phy/et1011c.c
index a8eb19e..2878597 100644
--- a/drivers/net/phy/et1011c.c
+++ b/drivers/net/phy/et1011c.c
@@ -94,6 +94,7 @@ static struct phy_driver et1011c_driver = {
 	.features	= (PHY_BASIC_FEATURES | SUPPORTED_1000baseT_Full),
 	.flags		= PHY_POLL,
 	.config_aneg	= et1011c_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= et1011c_read_status,
 	.driver 	= { .owner = THIS_MODULE,},
 };
diff --git a/drivers/net/phy/icplus.c b/drivers/net/phy/icplus.c
index d5199cb..7d35770 100644
--- a/drivers/net/phy/icplus.c
+++ b/drivers/net/phy/icplus.c
@@ -210,6 +210,7 @@ static struct phy_driver icplus_driver[] = {
 	.features	= PHY_BASIC_FEATURES,
 	.config_init	= &ip175c_config_init,
 	.config_aneg	= &ip175c_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= &ip175c_read_status,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
@@ -222,6 +223,7 @@ static struct phy_driver icplus_driver[] = {
 			  SUPPORTED_Asym_Pause,
 	.config_init	= &ip1001_config_init,
 	.config_aneg	= &genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= &genphy_read_status,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
@@ -236,6 +238,7 @@ static struct phy_driver icplus_driver[] = {
 	.ack_interrupt	= ip101a_g_ack_interrupt,
 	.config_init	= &ip101a_g_config_init,
 	.config_aneg	= &genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= &genphy_read_status,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
diff --git a/drivers/net/phy/lxt.c b/drivers/net/phy/lxt.c
index 6d1e3fc..846d408 100644
--- a/drivers/net/phy/lxt.c
+++ b/drivers/net/phy/lxt.c
@@ -158,6 +158,7 @@ static struct phy_driver lxt97x_driver[] = {
 	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= lxt970_config_init,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= lxt970_ack_interrupt,
 	.config_intr	= lxt970_config_intr,
@@ -169,6 +170,7 @@ static struct phy_driver lxt97x_driver[] = {
 	.features	= PHY_BASIC_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= lxt971_ack_interrupt,
 	.config_intr	= lxt971_config_intr,
@@ -181,6 +183,7 @@ static struct phy_driver lxt97x_driver[] = {
 	.flags		= 0,
 	.probe		= lxt973_probe,
 	.config_aneg	= lxt973_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.driver		= { .owner = THIS_MODULE,},
 } };
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 5d2a3f2..f485290 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -713,6 +713,7 @@ static struct phy_driver marvell_drivers[] = {
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.config_aneg = &marvell_config_aneg,
+		.aneg_done = genphy_aneg_done,
 		.read_status = &genphy_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
 		.config_intr = &marvell_config_intr,
@@ -726,6 +727,7 @@ static struct phy_driver marvell_drivers[] = {
 		.flags = PHY_HAS_INTERRUPT,
 		.config_init = &m88e1111_config_init,
 		.config_aneg = &marvell_config_aneg,
+		.aneg_done = genphy_aneg_done,
 		.read_status = &genphy_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
 		.config_intr = &marvell_config_intr,
@@ -739,6 +741,7 @@ static struct phy_driver marvell_drivers[] = {
 		.flags = PHY_HAS_INTERRUPT,
 		.config_init = &m88e1111_config_init,
 		.config_aneg = &marvell_config_aneg,
+		.aneg_done = genphy_aneg_done,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
 		.config_intr = &marvell_config_intr,
@@ -752,6 +755,7 @@ static struct phy_driver marvell_drivers[] = {
 		.flags = PHY_HAS_INTERRUPT,
 		.config_init = &m88e1118_config_init,
 		.config_aneg = &m88e1118_config_aneg,
+		.aneg_done = genphy_aneg_done,
 		.read_status = &genphy_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
 		.config_intr = &marvell_config_intr,
@@ -764,6 +768,7 @@ static struct phy_driver marvell_drivers[] = {
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.config_aneg = &m88e1121_config_aneg,
+		.aneg_done = genphy_aneg_done,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
 		.config_intr = &marvell_config_intr,
@@ -777,6 +782,7 @@ static struct phy_driver marvell_drivers[] = {
 		.features = PHY_GBIT_FEATURES,
 		.flags = PHY_HAS_INTERRUPT,
 		.config_aneg = &m88e1318_config_aneg,
+		.aneg_done = genphy_aneg_done,
 		.read_status = &marvell_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
 		.config_intr = &marvell_config_intr,
@@ -791,6 +797,7 @@ static struct phy_driver marvell_drivers[] = {
 		.flags = PHY_HAS_INTERRUPT,
 		.config_init = &m88e1145_config_init,
 		.config_aneg = &marvell_config_aneg,
+		.aneg_done = genphy_aneg_done,
 		.read_status = &genphy_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
 		.config_intr = &marvell_config_intr,
@@ -804,6 +811,7 @@ static struct phy_driver marvell_drivers[] = {
 		.flags = PHY_HAS_INTERRUPT,
 		.config_init = &m88e1149_config_init,
 		.config_aneg = &m88e1118_config_aneg,
+		.aneg_done = genphy_aneg_done,
 		.read_status = &genphy_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
 		.config_intr = &marvell_config_intr,
@@ -817,6 +825,7 @@ static struct phy_driver marvell_drivers[] = {
 		.flags = PHY_HAS_INTERRUPT,
 		.config_init = &m88e1111_config_init,
 		.config_aneg = &marvell_config_aneg,
+		.aneg_done = genphy_aneg_done,
 		.read_status = &genphy_read_status,
 		.ack_interrupt = &marvell_ack_interrupt,
 		.config_intr = &marvell_config_intr,
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index cf287e0..12d4f13 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -123,6 +123,7 @@ static struct phy_driver ksphy_driver[] = {
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.config_init	= kszphy_config_init,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= kszphy_ack_interrupt,
 	.config_intr	= ks8737_config_intr,
@@ -136,6 +137,7 @@ static struct phy_driver ksphy_driver[] = {
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.config_init	= kszphy_config_init,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= kszphy_ack_interrupt,
 	.config_intr	= kszphy_config_intr,
@@ -149,6 +151,7 @@ static struct phy_driver ksphy_driver[] = {
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.config_init	= ks8051_config_init,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= kszphy_ack_interrupt,
 	.config_intr	= kszphy_config_intr,
@@ -161,6 +164,7 @@ static struct phy_driver ksphy_driver[] = {
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.config_init	= kszphy_config_init,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= kszphy_ack_interrupt,
 	.config_intr	= kszphy_config_intr,
@@ -174,6 +178,7 @@ static struct phy_driver ksphy_driver[] = {
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.config_init	= kszphy_config_init,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= kszphy_ack_interrupt,
 	.config_intr	= ksz9021_config_intr,
diff --git a/drivers/net/phy/national.c b/drivers/net/phy/national.c
index 9a5f234..7b47a2b 100644
--- a/drivers/net/phy/national.c
+++ b/drivers/net/phy/national.c
@@ -137,6 +137,7 @@ static struct phy_driver dp83865_driver = {
 	.flags = PHY_HAS_INTERRUPT,
 	.config_init = ns_config_init,
 	.config_aneg = genphy_config_aneg,
+	.aneg_done = genphy_aneg_done,
 	.read_status = genphy_read_status,
 	.ack_interrupt = ns_ack_interrupt,
 	.config_intr = ns_config_intr,
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index ef9ea92..dfca51d 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -95,24 +95,6 @@ static int phy_config_interrupt(struct phy_device *phydev, u32 interrupts)
 	return err;
 }
 
-
-/**
- * phy_aneg_done - return auto-negotiation status
- * @phydev: target phy_device struct
- *
- * Description: Reads the status register and returns 0 either if
- *   auto-negotiation is incomplete, or if there was an error.
- *   Returns BMSR_ANEGCOMPLETE if auto-negotiation is done.
- */
-static inline int phy_aneg_done(struct phy_device *phydev)
-{
-	int retval;
-
-	retval = phy_read(phydev, MII_BMSR);
-
-	return (retval < 0) ? retval : (retval & BMSR_ANEGCOMPLETE);
-}
-
 /* A structure for mapping a particular speed and duplex
  * combination to a particular SUPPORTED and ADVERTISED value */
 struct phy_setting {
@@ -807,7 +789,7 @@ void phy_state_machine(struct work_struct *work)
 
 			/* Check if negotiation is done.  Break
 			 * if there's an error */
-			err = phy_aneg_done(phydev);
+			err = phydev->drv->aneg_done(phydev);
 			if (err < 0)
 				break;
 
@@ -921,7 +903,7 @@ void phy_state_machine(struct work_struct *work)
 				break;
 
 			if (AUTONEG_ENABLE == phydev->autoneg) {
-				err = phy_aneg_done(phydev);
+				err = phydev->drv->aneg_done(phydev);
 				if (err < 0)
 					break;
 
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 8af46e8..8e09103 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -785,6 +785,23 @@ int genphy_config_aneg(struct phy_device *phydev)
 EXPORT_SYMBOL(genphy_config_aneg);
 
 /**
+ * genphy_aneg_done - return auto-negotiation status
+ * @phydev: target phy_device struct
+ *
+ * Description: Reads the status register and returns positive value if
+ * auto-negotiation is complete, 0 if incomplete and negative on failure.
+ */
+int genphy_aneg_done(struct phy_device *phydev)
+{
+	int retval;
+
+	retval = phy_read(phydev, MII_BMSR);
+
+	return (retval < 0) ? retval : (retval & BMSR_ANEGCOMPLETE);
+}
+EXPORT_SYMBOL(genphy_aneg_done);
+
+/**
  * genphy_update_link - update link status in @phydev
  * @phydev: target phy_device struct
  *
@@ -1117,6 +1134,7 @@ static struct phy_driver genphy_driver = {
 	.config_init	= genphy_config_init,
 	.features	= 0,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
diff --git a/drivers/net/phy/qsemi.c b/drivers/net/phy/qsemi.c
index fe0d0a1..5208b16 100644
--- a/drivers/net/phy/qsemi.c
+++ b/drivers/net/phy/qsemi.c
@@ -119,6 +119,7 @@ static struct phy_driver qs6612_driver = {
 	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= qs6612_config_init,
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= qs6612_ack_interrupt,
 	.config_intr	= qs6612_config_intr,
diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index 72f9347..2a54bc9 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -57,6 +57,7 @@ static struct phy_driver rtl821x_driver = {
 	.features	= PHY_GBIT_FEATURES,
 	.flags		= PHY_HAS_INTERRUPT,
 	.config_aneg	= &genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= &genphy_read_status,
 	.ack_interrupt	= &rtl821x_ack_interrupt,
 	.config_intr	= &rtl821x_config_intr,
diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index 6d61923..dd494aa 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -73,6 +73,7 @@ static struct phy_driver smsc_phy_driver[] = {
 
 	/* basic functions */
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.config_init	= smsc_phy_config_init,
 
@@ -95,6 +96,7 @@ static struct phy_driver smsc_phy_driver[] = {
 
 	/* basic functions */
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.config_init	= smsc_phy_config_init,
 
@@ -117,6 +119,7 @@ static struct phy_driver smsc_phy_driver[] = {
 
 	/* basic functions */
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.config_init	= smsc_phy_config_init,
 
@@ -139,6 +142,7 @@ static struct phy_driver smsc_phy_driver[] = {
 
 	/* basic functions */
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.config_init	= lan911x_config_init,
 
@@ -161,6 +165,7 @@ static struct phy_driver smsc_phy_driver[] = {
 
 	/* basic functions */
 	.config_aneg	= genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= genphy_read_status,
 	.config_init	= smsc_phy_config_init,
 
diff --git a/drivers/net/phy/ste10Xp.c b/drivers/net/phy/ste10Xp.c
index 5e1eb13..8d9baae 100644
--- a/drivers/net/phy/ste10Xp.c
+++ b/drivers/net/phy/ste10Xp.c
@@ -90,6 +90,7 @@ static struct phy_driver ste10xp_pdriver[] = {
 	.flags = PHY_HAS_INTERRUPT,
 	.config_init = ste10Xp_config_init,
 	.config_aneg = genphy_config_aneg,
+	.aneg_done = genphy_aneg_done,
 	.read_status = genphy_read_status,
 	.ack_interrupt = ste10Xp_ack_interrupt,
 	.config_intr = ste10Xp_config_intr,
@@ -104,6 +105,7 @@ static struct phy_driver ste10xp_pdriver[] = {
 	.flags = PHY_HAS_INTERRUPT,
 	.config_init = ste10Xp_config_init,
 	.config_aneg = genphy_config_aneg,
+	.aneg_done = genphy_aneg_done,
 	.read_status = genphy_read_status,
 	.ack_interrupt = ste10Xp_ack_interrupt,
 	.config_intr = ste10Xp_config_intr,
diff --git a/drivers/net/phy/vitesse.c b/drivers/net/phy/vitesse.c
index 2585c38..17c97b4 100644
--- a/drivers/net/phy/vitesse.c
+++ b/drivers/net/phy/vitesse.c
@@ -160,6 +160,7 @@ static struct phy_driver vsc82xx_driver[] = {
 	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= &vsc824x_config_init,
 	.config_aneg	= &genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= &genphy_read_status,
 	.ack_interrupt	= &vsc824x_ack_interrupt,
 	.config_intr	= &vsc82xx_config_intr,
@@ -173,6 +174,7 @@ static struct phy_driver vsc82xx_driver[] = {
 	.flags		= PHY_HAS_INTERRUPT,
 	.config_init	= &vsc8221_config_init,
 	.config_aneg	= &genphy_config_aneg,
+	.aneg_done	= genphy_aneg_done,
 	.read_status	= &genphy_read_status,
 	.ack_interrupt	= &vsc824x_ack_interrupt,
 	.config_intr	= &vsc82xx_config_intr,
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 93b3cf7..876fbfa 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -369,8 +369,8 @@ struct phy_device {
  * flags: A bitfield defining certain other features this PHY
  *   supports (like interrupts)
  *
- * The drivers must implement config_aneg and read_status.  All
- * other functions are optional. Note that none of these
+ * The drivers must implement config_aneg, aneg_done  and read_status.
+ * All other functions are optional. Note that none of these
  * functions should be called from interrupt time.  The goal is
  * for the bus read/write functions to be able to block when the
  * bus transaction is happening, and be freed up by an interrupt
@@ -408,6 +408,13 @@ struct phy_driver {
 	 */
 	int (*config_aneg)(struct phy_device *phydev);
 
+	/*
+	 * Reads auto-negotiation status. Returns positive value if
+	 * auto-negotiation is complete, 0 if incomplete and negative
+	 * value on error
+	 */
+	int (*aneg_done)(struct phy_device *phydev);
+
 	/* Determines the negotiated speed and duplex */
 	int (*read_status)(struct phy_device *phydev);
 
@@ -528,6 +535,7 @@ static inline int phy_read_status(struct phy_device *phydev) {
 
 int genphy_restart_aneg(struct phy_device *phydev);
 int genphy_config_aneg(struct phy_device *phydev);
+int genphy_aneg_done(struct phy_device *phydev);
 int genphy_update_link(struct phy_device *phydev);
 int genphy_read_status(struct phy_device *phydev);
 int genphy_suspend(struct phy_device *phydev);

^ permalink raw reply related

* [PATCH] Replace genphy_update_link() call with phy_read_status()
From: Alexander Sverdlin @ 2012-09-12 11:23 UTC (permalink / raw)
  To: netdev, Andy Fleming, davem

From: Alexander Sverdlin <alexander.sverdlin@sysgo.com>

Replace genphy_update_link() call with phy_read_status() 

Code in phy.c should not call genphy_*() functions directly, this breaks PHY layer abstraction.
Some drivers may re-implement "read_status" callback and it's not being called in one place of
PHY state machine, where genphy_update_link() is called instead. So fix it.
For drivers that rely on genphy_* implementation nothing changed, as genphy_read_status() calls
genphy_update_link() anyway.

Signed-off-by: Alexander Sverdlin <alexander.sverdlin@sysgo.com>
---
 drivers/net/phy/phy.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index dfca51d..cfed41c 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -839,7 +839,7 @@ void phy_state_machine(struct work_struct *work)
 			}
 			break;
 		case PHY_FORCING:
-			err = genphy_update_link(phydev);
+			err = phy_read_status(phydev);
 
 			if (err)
 				break;

^ permalink raw reply related

* [V4 PATCH 1/8] cxgb4/cxgb4vf: Chelsio FCoE offload driver submission (common header updates).
From: Naresh Kumar Inna @ 2012-09-12 17:18 UTC (permalink / raw)
  To: JBottomley, linux-scsi, dm, leedom; +Cc: netdev, naresh, chethan
In-Reply-To: <1347470328-32490-1-git-send-email-naresh@chelsio.com>

This patch contains updates to firmware/hardware header files shared
between csiostor and cxgb4/cxgb4vf, and the resulting changes to the
cxgb4/cxgb4vf source files.

Signed-off-by: Naresh Kumar Inna <naresh@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c |    2 +-
 drivers/net/ethernet/chelsio/cxgb4/sge.c        |   10 +-
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c      |   16 ++--
 drivers/net/ethernet/chelsio/cxgb4/t4_msg.h     |    1 +
 drivers/net/ethernet/chelsio/cxgb4/t4_regs.h    |   69 ++++++++++++++-
 drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h   |  104 +++++++++++++++++++---
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c      |   11 ++-
 7 files changed, 176 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 5ed49af..b56d96c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -3102,7 +3102,7 @@ static int adap_init1(struct adapter *adap, struct fw_caps_config_cmd *c)
 	memset(c, 0, sizeof(*c));
 	c->op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
 			       FW_CMD_REQUEST | FW_CMD_READ);
-	c->retval_len16 = htonl(FW_LEN16(*c));
+	c->cfvalid_to_len16 = htonl(FW_LEN16(*c));
 	ret = t4_wr_mbox(adap, adap->fn, c, sizeof(*c), c);
 	if (ret < 0)
 		return ret;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index d49933e..121b1e9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -455,7 +455,7 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
 {
 	if (q->pend_cred >= 8) {
 		wmb();
-		t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), DBPRIO |
+		t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL), DBPRIO(1) |
 			     QID(q->cntxt_id) | PIDX(q->pend_cred / 8));
 		q->pend_cred &= 7;
 	}
@@ -2020,10 +2020,10 @@ int t4_sge_alloc_rxq(struct adapter *adap, struct sge_rspq *iq, bool fwevtq,
 			goto fl_nomem;
 
 		flsz = fl->size / 8 + STAT_LEN / sizeof(struct tx_desc);
-		c.iqns_to_fl0congen = htonl(FW_IQ_CMD_FL0PACKEN |
+		c.iqns_to_fl0congen = htonl(FW_IQ_CMD_FL0PACKEN(1) |
 					    FW_IQ_CMD_FL0FETCHRO(1) |
 					    FW_IQ_CMD_FL0DATARO(1) |
-					    FW_IQ_CMD_FL0PADEN);
+					    FW_IQ_CMD_FL0PADEN(1));
 		c.fl0dcaen_to_fl0cidxfthresh = htons(FW_IQ_CMD_FL0FBMIN(2) |
 				FW_IQ_CMD_FL0FBMAX(3));
 		c.fl0size = htons(flsz);
@@ -2416,10 +2416,10 @@ void t4_sge_init(struct adapter *adap)
 	unsigned int fl_align_log = ilog2(FL_ALIGN);
 
 	t4_set_reg_field(adap, SGE_CONTROL, PKTSHIFT_MASK |
-			 INGPADBOUNDARY_MASK | EGRSTATUSPAGESIZE,
+			 INGPADBOUNDARY_MASK | EGRSTATUSPAGESIZE(1),
 			 INGPADBOUNDARY(fl_align_log - 5) | PKTSHIFT(2) |
 			 RXPKTCPLMODE |
-			 (STAT_LEN == 128 ? EGRSTATUSPAGESIZE : 0));
+			 (STAT_LEN == 128 ? EGRSTATUSPAGESIZE(1) : 0));
 
 	/*
 	 * Set up to drop DOORBELL writes when the DOORBELL FIFO overflows
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index fa947df..a943faa 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -456,12 +456,12 @@ static int sf1_read(struct adapter *adapter, unsigned int byte_cnt, int cont,
 
 	if (!byte_cnt || byte_cnt > 4)
 		return -EINVAL;
-	if (t4_read_reg(adapter, SF_OP) & BUSY)
+	if (t4_read_reg(adapter, SF_OP) & SF_BUSY)
 		return -EBUSY;
 	cont = cont ? SF_CONT : 0;
 	lock = lock ? SF_LOCK : 0;
 	t4_write_reg(adapter, SF_OP, lock | cont | BYTECNT(byte_cnt - 1));
-	ret = t4_wait_op_done(adapter, SF_OP, BUSY, 0, SF_ATTEMPTS, 5);
+	ret = t4_wait_op_done(adapter, SF_OP, SF_BUSY, 0, SF_ATTEMPTS, 5);
 	if (!ret)
 		*valp = t4_read_reg(adapter, SF_DATA);
 	return ret;
@@ -484,14 +484,14 @@ static int sf1_write(struct adapter *adapter, unsigned int byte_cnt, int cont,
 {
 	if (!byte_cnt || byte_cnt > 4)
 		return -EINVAL;
-	if (t4_read_reg(adapter, SF_OP) & BUSY)
+	if (t4_read_reg(adapter, SF_OP) & SF_BUSY)
 		return -EBUSY;
 	cont = cont ? SF_CONT : 0;
 	lock = lock ? SF_LOCK : 0;
 	t4_write_reg(adapter, SF_DATA, val);
 	t4_write_reg(adapter, SF_OP, lock |
 		     cont | BYTECNT(byte_cnt - 1) | OP_WR);
-	return t4_wait_op_done(adapter, SF_OP, BUSY, 0, SF_ATTEMPTS, 5);
+	return t4_wait_op_done(adapter, SF_OP, SF_BUSY, 0, SF_ATTEMPTS, 5);
 }
 
 /**
@@ -1972,14 +1972,14 @@ int t4_wol_pat_enable(struct adapter *adap, unsigned int port, unsigned int map,
 		t4_write_reg(adap, EPIO_REG(DATA0), mask0);
 		t4_write_reg(adap, EPIO_REG(OP), ADDRESS(i) | EPIOWR);
 		t4_read_reg(adap, EPIO_REG(OP));                /* flush */
-		if (t4_read_reg(adap, EPIO_REG(OP)) & BUSY)
+		if (t4_read_reg(adap, EPIO_REG(OP)) & SF_BUSY)
 			return -ETIMEDOUT;
 
 		/* write CRC */
 		t4_write_reg(adap, EPIO_REG(DATA0), crc);
 		t4_write_reg(adap, EPIO_REG(OP), ADDRESS(i + 32) | EPIOWR);
 		t4_read_reg(adap, EPIO_REG(OP));                /* flush */
-		if (t4_read_reg(adap, EPIO_REG(OP)) & BUSY)
+		if (t4_read_reg(adap, EPIO_REG(OP)) & SF_BUSY)
 			return -ETIMEDOUT;
 	}
 #undef EPIO_REG
@@ -2118,7 +2118,7 @@ int t4_fw_hello(struct adapter *adap, unsigned int mbox, unsigned int evt_mbox,
 	struct fw_hello_cmd c;
 
 	INIT_CMD(c, HELLO, WRITE);
-	c.err_to_mbasyncnot = htonl(
+	c.err_to_clearinit = htonl(
 		FW_HELLO_CMD_MASTERDIS(master == MASTER_CANT) |
 		FW_HELLO_CMD_MASTERFORCE(master == MASTER_MUST) |
 		FW_HELLO_CMD_MBMASTER(master == MASTER_MUST ? mbox : 0xff) |
@@ -2126,7 +2126,7 @@ int t4_fw_hello(struct adapter *adap, unsigned int mbox, unsigned int evt_mbox,
 
 	ret = t4_wr_mbox(adap, mbox, &c, sizeof(c), &c);
 	if (ret == 0 && state) {
-		u32 v = ntohl(c.err_to_mbasyncnot);
+		u32 v = ntohl(c.err_to_clearinit);
 		if (v & FW_HELLO_CMD_INIT)
 			*state = DEV_STATE_INIT;
 		else if (v & FW_HELLO_CMD_ERR)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
index eb71b82..b760808 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h
@@ -658,6 +658,7 @@ struct ulptx_sgl {
 	__be32 cmd_nsge;
 #define ULPTX_CMD(x) ((x) << 24)
 #define ULPTX_NSGE(x) ((x) << 0)
+#define ULPTX_MORE (1U << 23)
 	__be32 len0;
 	__be64 addr0;
 	struct ulptx_sge_pair sge[0];
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
index 111fc32..6bace75 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_regs.h
@@ -67,7 +67,7 @@
 #define  QID_MASK    0xffff8000U
 #define  QID_SHIFT   15
 #define  QID(x)      ((x) << QID_SHIFT)
-#define  DBPRIO      0x00004000U
+#define  DBPRIO(x)   ((x) << 14)
 #define  PIDX_MASK   0x00003fffU
 #define  PIDX_SHIFT  0
 #define  PIDX(x)     ((x) << PIDX_SHIFT)
@@ -89,7 +89,7 @@
 #define SGE_CONTROL 0x1008
 #define  DCASYSTYPE             0x00080000U
 #define  RXPKTCPLMODE           0x00040000U
-#define  EGRSTATUSPAGESIZE      0x00020000U
+#define  EGRSTATUSPAGESIZE(x)   ((x) << 17)
 #define  PKTSHIFT_MASK          0x00001c00U
 #define  PKTSHIFT_SHIFT         10
 #define  PKTSHIFT(x)            ((x) << PKTSHIFT_SHIFT)
@@ -111,6 +111,13 @@
 #define  HOSTPAGESIZEPF0_MASK   0x0000000fU
 #define  HOSTPAGESIZEPF0_SHIFT  0
 #define  HOSTPAGESIZEPF0(x)     ((x) << HOSTPAGESIZEPF0_SHIFT)
+#define  HOSTPAGESIZEPF1(x)     ((x) << 4)
+#define  HOSTPAGESIZEPF2(x)     ((x) << 8)
+#define  HOSTPAGESIZEPF3(x)     ((x) << 12)
+#define  HOSTPAGESIZEPF4(x)     ((x) << 16)
+#define  HOSTPAGESIZEPF5(x)     ((x) << 20)
+#define  HOSTPAGESIZEPF6(x)     ((x) << 24)
+#define  HOSTPAGESIZEPF7(x)     ((x) << 28)
 
 #define SGE_EGRESS_QUEUES_PER_PAGE_PF 0x1010
 #define  QUEUESPERPAGEPF0_MASK   0x0000000fU
@@ -155,6 +162,14 @@
 #define SGE_INT_ENABLE3 0x1040
 #define SGE_FL_BUFFER_SIZE0 0x1044
 #define SGE_FL_BUFFER_SIZE1 0x1048
+#define SGE_FL_BUFFER_SIZE2 0x104c
+#define SGE_FL_BUFFER_SIZE3 0x1050
+#define SGE_FL_BUFFER_SIZE4 0x1054
+#define SGE_FL_BUFFER_SIZE5 0x1058
+#define SGE_FL_BUFFER_SIZE6 0x105c
+#define SGE_FL_BUFFER_SIZE7 0x1060
+#define SGE_FL_BUFFER_SIZE8 0x1064
+
 #define SGE_INGRESS_RX_THRESHOLD 0x10a0
 #define  THRESHOLD_0_MASK   0x3f000000U
 #define  THRESHOLD_0_SHIFT  24
@@ -173,6 +188,17 @@
 #define  THRESHOLD_3(x)     ((x) << THRESHOLD_3_SHIFT)
 #define  THRESHOLD_3_GET(x) (((x) & THRESHOLD_3_MASK) >> THRESHOLD_3_SHIFT)
 
+#define SGE_DBFIFO_STATUS 0x10a4
+#define  HP_INT_THRESH_SHIFT 28
+#define  HP_INT_THRESH_MASK  0xfU
+#define  HP_INT_THRESH(x)    ((x) << HP_INT_THRESH_SHIFT)
+#define  LP_INT_THRESH_SHIFT 12
+#define  LP_INT_THRESH_MASK  0xfU
+#define  LP_INT_THRESH(x)    ((x) << LP_INT_THRESH_SHIFT)
+
+#define SGE_DOORBELL_CONTROL 0x10a8
+#define  ENABLE_DROP        (1 << 13)
+
 #define SGE_TIMER_VALUE_0_AND_1 0x10b8
 #define  TIMERVALUE0_MASK   0xffff0000U
 #define  TIMERVALUE0_SHIFT  16
@@ -184,7 +210,25 @@
 #define  TIMERVALUE1_GET(x) (((x) & TIMERVALUE1_MASK) >> TIMERVALUE1_SHIFT)
 
 #define SGE_TIMER_VALUE_2_AND_3 0x10bc
+#define  TIMERVALUE2_MASK   0xffff0000U
+#define  TIMERVALUE2_SHIFT  16
+#define  TIMERVALUE2(x)     ((x) << TIMERVALUE0_SHIFT)
+#define  TIMERVALUE2_GET(x) (((x) & TIMERVALUE0_MASK) >> TIMERVALUE0_SHIFT)
+#define  TIMERVALUE3_MASK   0x0000ffffU
+#define  TIMERVALUE3_SHIFT  0
+#define  TIMERVALUE3(x)     ((x) << TIMERVALUE1_SHIFT)
+#define  TIMERVALUE3_GET(x) (((x) & TIMERVALUE1_MASK) >> TIMERVALUE1_SHIFT)
+
 #define SGE_TIMER_VALUE_4_AND_5 0x10c0
+#define  TIMERVALUE4_MASK   0xffff0000U
+#define  TIMERVALUE4_SHIFT  16
+#define  TIMERVALUE4(x)     ((x) << TIMERVALUE0_SHIFT)
+#define  TIMERVALUE4_GET(x) (((x) & TIMERVALUE0_MASK) >> TIMERVALUE0_SHIFT)
+#define  TIMERVALUE5_MASK   0x0000ffffU
+#define  TIMERVALUE5_SHIFT  0
+#define  TIMERVALUE5(x)     ((x) << TIMERVALUE1_SHIFT)
+#define  TIMERVALUE5_GET(x) (((x) & TIMERVALUE1_MASK) >> TIMERVALUE1_SHIFT)
+
 #define SGE_DEBUG_INDEX 0x10cc
 #define SGE_DEBUG_DATA_HIGH 0x10d0
 #define SGE_DEBUG_DATA_LOW 0x10d4
@@ -243,6 +287,10 @@
 #define M_HP_INT_THRESH 0xfU
 #define M_LP_INT_THRESH 0xfU
 
+#define PCIE_PF_CFG 0x40
+#define  AIVEC(x)	((x) << 4)
+#define  AIVEC_MASK	0x3ffU
+
 #define PCIE_PF_CLI 0x44
 #define PCIE_INT_CAUSE 0x3004
 #define  UNXSPLCPLERR  0x20000000U
@@ -287,6 +335,15 @@
 #define  WINDOW(x)       ((x) << WINDOW_SHIFT)
 #define PCIE_MEM_ACCESS_OFFSET 0x306c
 
+#define PCIE_FW 0x30b8
+#define  PCIE_FW_ERR		0x80000000U
+#define  PCIE_FW_INIT		0x40000000U
+#define  PCIE_FW_HALT		0x20000000U
+#define  PCIE_FW_MASTER_VLD	0x00008000U
+#define  PCIE_FW_MASTER(x)	((x) << 12)
+#define  PCIE_FW_MASTER_MASK	0x7
+#define  PCIE_FW_MASTER_GET(x)	(((x) >> 12) & PCIE_FW_MASTER_MASK)
+
 #define PCIE_CORE_UTL_SYSTEM_BUS_AGENT_STATUS 0x5908
 #define  RNPP 0x80000000U
 #define  RPCP 0x20000000U
@@ -384,6 +441,8 @@
 #define EDC_1_BASE_ADDR 0x7980
 
 #define CIM_BOOT_CFG 0x7b00
+#define  UPCRST		0x00000001U
+
 #define  BOOTADDR_MASK 0xffffff00U
 
 #define CIM_PF_MAILBOX_DATA 0x240
@@ -395,6 +454,9 @@
 #define  MBOWNER(x)     ((x) << MBOWNER_SHIFT)
 #define  MBOWNER_GET(x) (((x) & MBOWNER_MASK) >> MBOWNER_SHIFT)
 
+#define CIM_PF_HOST_INT_ENABLE 0x288
+#define  MBMSGRDYINTEN(x) ((x) << 19)
+
 #define CIM_PF_HOST_INT_CAUSE 0x28c
 #define  MBMSGRDYINT 0x00080000U
 
@@ -825,7 +887,7 @@
 
 #define SF_DATA 0x193f8
 #define SF_OP 0x193fc
-#define  BUSY          0x80000000U
+#define  SF_BUSY       0x80000000U
 #define  SF_LOCK       0x00000010U
 #define  SF_CONT       0x00000008U
 #define  BYTECNT_MASK  0x00000006U
@@ -884,6 +946,7 @@
 #define  I2CM       0x00000002U
 #define  CIM        0x00000001U
 
+#define PL_INT_ENABLE 0x19410
 #define PL_INT_MAP0 0x19414
 #define PL_RST 0x19428
 #define  PIORST     0x00000002U
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index ad53f79..034ca39 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -68,6 +68,7 @@ struct fw_wr_hdr {
 };
 
 #define FW_WR_OP(x)	 ((x) << 24)
+#define FW_WR_OP_GET(x)	 (((x) >> 24) & 0xff)
 #define FW_WR_ATOMIC(x)	 ((x) << 23)
 #define FW_WR_FLUSH(x)   ((x) << 22)
 #define FW_WR_COMPL(x)   ((x) << 21)
@@ -155,6 +156,9 @@ struct fw_eth_tx_pkt_vm_wr {
 
 #define FW_CMD_MAX_TIMEOUT 3000
 
+#define FW_CMD_HELLO_TIMEOUT (3 * FW_CMD_MAX_TIMEOUT)
+#define FW_CMD_HELLO_RETRIES 3
+
 enum fw_cmd_opcodes {
 	FW_LDST_CMD                    = 0x01,
 	FW_RESET_CMD                   = 0x03,
@@ -209,6 +213,7 @@ struct fw_cmd_hdr {
 #define FW_CMD_OP(x)		((x) << 24)
 #define FW_CMD_OP_GET(x)        (((x) >> 24) & 0xff)
 #define FW_CMD_REQUEST          (1U << 23)
+#define FW_CMD_REQUEST_GET(x)   (((x) >> 23) & 0x1)
 #define FW_CMD_READ		(1U << 22)
 #define FW_CMD_WRITE		(1U << 21)
 #define FW_CMD_EXEC		(1U << 20)
@@ -216,6 +221,7 @@ struct fw_cmd_hdr {
 #define FW_CMD_RETVAL(x)	((x) << 8)
 #define FW_CMD_RETVAL_GET(x)	(((x) >> 8) & 0xff)
 #define FW_CMD_LEN16(x)         ((x) << 0)
+#define FW_LEN16(fw_struct)	FW_CMD_LEN16(sizeof(fw_struct) / 16)
 
 enum fw_ldst_addrspc {
 	FW_LDST_ADDRSPC_FIRMWARE  = 0x0001,
@@ -228,7 +234,8 @@ enum fw_ldst_addrspc {
 	FW_LDST_ADDRSPC_TP_MIB    = 0x0012,
 	FW_LDST_ADDRSPC_MDIO      = 0x0018,
 	FW_LDST_ADDRSPC_MPS       = 0x0020,
-	FW_LDST_ADDRSPC_FUNC      = 0x0028
+	FW_LDST_ADDRSPC_FUNC      = 0x0028,
+	FW_LDST_ADDRSPC_FUNC_PCIE = 0x0029,
 };
 
 enum fw_ldst_mps_fid {
@@ -290,6 +297,16 @@ struct fw_ldst_cmd {
 			__be64 data0;
 			__be64 data1;
 		} func;
+		struct fw_ldst_pcie {
+			u8 ctrl_to_fn;
+			u8 bnum;
+			u8 r;
+			u8 ext_r;
+			u8 select_naccess;
+			u8 pcie_fn;
+			__be16 nset_pkd;
+			__be32 data[12];
+		} pcie;
 	} u;
 };
 
@@ -299,24 +316,42 @@ struct fw_ldst_cmd {
 #define FW_LDST_CMD_FID(x)	((x) << 15)
 #define FW_LDST_CMD_CTL(x)	((x) << 0)
 #define FW_LDST_CMD_RPLCPF(x)	((x) << 0)
+#define FW_LDST_CMD_LC		(1U << 4)
+#define FW_LDST_CMD_NACCESS(x)	((x) << 0)
+#define FW_LDST_CMD_FN(x)	((x) << 0)
 
 struct fw_reset_cmd {
 	__be32 op_to_write;
 	__be32 retval_len16;
 	__be32 val;
-	__be32 r3;
+	__be32 halt_pkd;
+};
+
+#define FW_RESET_CMD_HALT	(1U << 31)
+
+enum {
+	FW_HELLO_CMD_STAGE_OS		= 0,
+	FW_HELLO_CMD_STAGE_PREOS0	= 1,
+	FW_HELLO_CMD_STAGE_PREOS1	= 2,
+	FW_HELLO_CMD_STAGE_POSTOS	= 3,
 };
 
 struct fw_hello_cmd {
 	__be32 op_to_write;
 	__be32 retval_len16;
-	__be32 err_to_mbasyncnot;
-#define FW_HELLO_CMD_ERR	    (1U << 31)
-#define FW_HELLO_CMD_INIT	    (1U << 30)
-#define FW_HELLO_CMD_MASTERDIS(x)   ((x) << 29)
-#define FW_HELLO_CMD_MASTERFORCE(x) ((x) << 28)
-#define FW_HELLO_CMD_MBMASTER(x)    ((x) << 24)
-#define FW_HELLO_CMD_MBASYNCNOT(x)  ((x) << 20)
+	__be32 err_to_clearinit;
+#define FW_HELLO_CMD_ERR	        (1U << 31)
+#define FW_HELLO_CMD_INIT	        (1U << 30)
+#define FW_HELLO_CMD_MASTERDIS(x)	((x) << 29)
+#define FW_HELLO_CMD_MASTERFORCE(x)	((x) << 28)
+#define FW_HELLO_CMD_MBMASTER_MASK	0xf
+#define FW_HELLO_CMD_MBMASTER(x)	((x) << 24)
+#define FW_HELLO_CMD_MBMASTER_GET(x)	\
+		(((x) >> 24) & FW_HELLO_CMD_MBMASTER_MASK)
+#define FW_HELLO_CMD_MBASYNCNOTINT(x)	((x) << 23)
+#define FW_HELLO_CMD_MBASYNCNOT(x)	((x) << 20)
+#define FW_HELLO_CMD_STAGE(x)		((x) << 17)
+#define FW_HELLO_CMD_CLEARINIT		(1U << 16)
 	__be32 fwrev;
 };
 
@@ -399,11 +434,20 @@ enum fw_caps_config_iscsi {
 enum fw_caps_config_fcoe {
 	FW_CAPS_CONFIG_FCOE_INITIATOR	= 0x00000001,
 	FW_CAPS_CONFIG_FCOE_TARGET	= 0x00000002,
+	FW_CAPS_CONFIG_FCOE_CTRL_OFLD	= 0x00000004,
+};
+
+enum fw_memtype_cf {
+	FW_MEMTYPE_CF_EDC0		= 0x0,
+	FW_MEMTYPE_CF_EDC1		= 0x1,
+	FW_MEMTYPE_CF_EXTMEM		= 0x2,
+	FW_MEMTYPE_CF_FLASH		= 0x4,
+	FW_MEMTYPE_CF_INTERNAL		= 0x5,
 };
 
 struct fw_caps_config_cmd {
 	__be32 op_to_write;
-	__be32 retval_len16;
+	__be32 cfvalid_to_len16;
 	__be32 r2;
 	__be32 hwmbitmap;
 	__be16 nbmcaps;
@@ -416,10 +460,15 @@ struct fw_caps_config_cmd {
 	__be16 r4;
 	__be16 iscsicaps;
 	__be16 fcoecaps;
-	__be32 r5;
-	__be64 r6;
+	__be32 cfcsum;
+	__be32 finiver;
+	__be32 finicsum;
 };
 
+#define FW_CAPS_CONFIG_CMD_CFVALID (1U << 27)
+#define FW_CAPS_CONFIG_CMD_MEMTYPE_CF(x) ((x) << 24)
+#define FW_CAPS_CONFIG_CMD_MEMADDR64K_CF(x) ((x) << 16)
+
 /*
  * params command mnemonics
  */
@@ -451,6 +500,7 @@ enum fw_params_param_dev {
 	FW_PARAMS_PARAM_DEV_INTVER_FCOE = 0x0A,
 	FW_PARAMS_PARAM_DEV_FWREV = 0x0B,
 	FW_PARAMS_PARAM_DEV_TPREV = 0x0C,
+	FW_PARAMS_PARAM_DEV_CF = 0x0D,
 };
 
 /*
@@ -511,6 +561,9 @@ enum fw_params_param_dmaq {
 #define FW_PARAMS_PARAM_Z(x)   ((x) << 0)
 #define FW_PARAMS_PARAM_XYZ(x) ((x) << 0)
 #define FW_PARAMS_PARAM_YZ(x)  ((x) << 0)
+#define FW_PARAMS_PARAM_X_GET(x) (((x) >> 16) & 0xff)
+#define FW_PARAMS_PARAM_Y_GET(x) (((x) >> 8) & 0xff)
+#define FW_PARAMS_PARAM_Z_GET(x) (((x) >> 0) & 0xff)
 
 struct fw_params_cmd {
 	__be32 op_to_vfn;
@@ -648,8 +701,8 @@ struct fw_iq_cmd {
 #define FW_IQ_CMD_FL0FETCHRO(x) ((x) << 6)
 #define FW_IQ_CMD_FL0HOSTFCMODE(x) ((x) << 4)
 #define FW_IQ_CMD_FL0CPRIO(x) ((x) << 3)
-#define FW_IQ_CMD_FL0PADEN (1U << 2)
-#define FW_IQ_CMD_FL0PACKEN (1U << 1)
+#define FW_IQ_CMD_FL0PADEN(x) ((x) << 2)
+#define FW_IQ_CMD_FL0PACKEN(x) ((x) << 1)
 #define FW_IQ_CMD_FL0CONGEN (1U << 0)
 
 #define FW_IQ_CMD_FL0DCAEN(x) ((x) << 15)
@@ -1137,6 +1190,14 @@ enum fw_port_dcb_cfg_rc {
 	FW_PORT_DCB_CFG_ERROR	= 0x1
 };
 
+enum fw_port_dcb_type {
+	FW_PORT_DCB_TYPE_PGID		= 0x00,
+	FW_PORT_DCB_TYPE_PGRATE		= 0x01,
+	FW_PORT_DCB_TYPE_PRIORATE	= 0x02,
+	FW_PORT_DCB_TYPE_PFC		= 0x03,
+	FW_PORT_DCB_TYPE_APP_ID		= 0x04,
+};
+
 struct fw_port_cmd {
 	__be32 op_to_portid;
 	__be32 action_to_len16;
@@ -1204,6 +1265,7 @@ struct fw_port_cmd {
 #define FW_PORT_CMD_TXIPG(x) ((x) << 19)
 
 #define FW_PORT_CMD_LSTATUS (1U << 31)
+#define FW_PORT_CMD_LSTATUS_GET(x) (((x) >> 31) & 0x1)
 #define FW_PORT_CMD_LSPEED(x) ((x) << 24)
 #define FW_PORT_CMD_LSPEED_GET(x) (((x) >> 24) & 0x3f)
 #define FW_PORT_CMD_TXPAUSE (1U << 23)
@@ -1252,6 +1314,9 @@ enum fw_port_module_type {
 	FW_PORT_MOD_TYPE_TWINAX_PASSIVE,
 	FW_PORT_MOD_TYPE_TWINAX_ACTIVE,
 	FW_PORT_MOD_TYPE_LRM,
+	FW_PORT_MOD_TYPE_ERROR		= FW_PORT_CMD_MODTYPE_MASK - 3,
+	FW_PORT_MOD_TYPE_UNKNOWN	= FW_PORT_CMD_MODTYPE_MASK - 2,
+	FW_PORT_MOD_TYPE_NOTSUPPORTED	= FW_PORT_CMD_MODTYPE_MASK - 1,
 
 	FW_PORT_MOD_TYPE_NONE = FW_PORT_CMD_MODTYPE_MASK
 };
@@ -1613,7 +1678,11 @@ struct fw_hdr {
 	u8 intfver_iscsi;
 	u8 intfver_fcoe;
 	u8 reserved2;
-	__be32  reserved3[27];
+	__u32   reserved3;
+	__u32   reserved4;
+	__u32   reserved5;
+	__be32  flags;
+	__be32  reserved6[23];
 };
 
 #define FW_HDR_FW_VER_MAJOR_GET(x) (((x) >> 24) & 0xff)
@@ -1621,6 +1690,11 @@ struct fw_hdr {
 #define FW_HDR_FW_VER_MICRO_GET(x) (((x) >> 8) & 0xff)
 #define FW_HDR_FW_VER_BUILD_GET(x) (((x) >> 0) & 0xff)
 
+enum fw_hdr_flags {
+	FW_HDR_FLAGS_RESET_HALT	= 0x00000001,
+};
+
+
 #define S_FW_CMD_OP 24
 #define V_FW_CMD_OP(x) ((x) << S_FW_CMD_OP)
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index 8877fbf..33fc1ca 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -536,7 +536,7 @@ static inline void ring_fl_db(struct adapter *adapter, struct sge_fl *fl)
 	if (fl->pend_cred >= FL_PER_EQ_UNIT) {
 		wmb();
 		t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_KDOORBELL,
-			     DBPRIO |
+			     DBPRIO(1) |
 			     QID(fl->cntxt_id) |
 			     PIDX(fl->pend_cred / FL_PER_EQ_UNIT));
 		fl->pend_cred %= FL_PER_EQ_UNIT;
@@ -952,7 +952,7 @@ static inline void ring_tx_db(struct adapter *adapter, struct sge_txq *tq,
 	 * Warn if we write doorbells with the wrong priority and write
 	 * descriptors before telling HW.
 	 */
-	WARN_ON((QID(tq->cntxt_id) | PIDX(n)) & DBPRIO);
+	WARN_ON((QID(tq->cntxt_id) | PIDX(n)) & DBPRIO(1));
 	wmb();
 	t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_KDOORBELL,
 		     QID(tq->cntxt_id) | PIDX(n));
@@ -2126,8 +2126,8 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq,
 		cmd.iqns_to_fl0congen =
 			cpu_to_be32(
 				FW_IQ_CMD_FL0HOSTFCMODE(SGE_HOSTFCMODE_NONE) |
-				FW_IQ_CMD_FL0PACKEN |
-				FW_IQ_CMD_FL0PADEN);
+				FW_IQ_CMD_FL0PACKEN(1) |
+				FW_IQ_CMD_FL0PADEN(1));
 		cmd.fl0dcaen_to_fl0cidxfthresh =
 			cpu_to_be16(
 				FW_IQ_CMD_FL0FBMIN(SGE_FETCHBURSTMIN_64B) |
@@ -2431,7 +2431,8 @@ int t4vf_sge_init(struct adapter *adapter)
 	 */
 	if (fl1)
 		FL_PG_ORDER = ilog2(fl1) - PAGE_SHIFT;
-	STAT_LEN = ((sge_params->sge_control & EGRSTATUSPAGESIZE) ? 128 : 64);
+	STAT_LEN = ((sge_params->sge_control & EGRSTATUSPAGESIZE(1)) ?
+		    128 : 64);
 	PKTSHIFT = PKTSHIFT_GET(sge_params->sge_control);
 	FL_ALIGN = 1 << (INGPADBOUNDARY_GET(sge_params->sge_control) +
 			 SGE_INGPADBOUNDARY_SHIFT);
-- 
1.7.1

^ permalink raw reply related

* [V4 PATCH 6/8] csiostor: Chelsio FCoE offload driver submission (sources part 3).
From: Naresh Kumar Inna @ 2012-09-12 17:18 UTC (permalink / raw)
  To: JBottomley, linux-scsi, dm, leedom; +Cc: netdev, naresh, chethan
In-Reply-To: <1347470328-32490-1-git-send-email-naresh@chelsio.com>

This patch contains code to implement the local and remote node port
functionality. It includes tracking the firmware events for changes to
the states of these ports.

Signed-off-by: Naresh Kumar Inna <naresh@chelsio.com>
---
 drivers/scsi/csiostor/csio_lnode.c | 2148 ++++++++++++++++++++++++++++++++++++
 drivers/scsi/csiostor/csio_rnode.c |  889 +++++++++++++++
 2 files changed, 3037 insertions(+), 0 deletions(-)
 create mode 100644 drivers/scsi/csiostor/csio_lnode.c
 create mode 100644 drivers/scsi/csiostor/csio_rnode.c

diff --git a/drivers/scsi/csiostor/csio_lnode.c b/drivers/scsi/csiostor/csio_lnode.c
new file mode 100644
index 0000000..24f38a2
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_lnode.c
@@ -0,0 +1,2148 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/utsname.h>
+#include <scsi/scsi_transport_fc.h>
+#include <asm/unaligned.h>
+#include <scsi/fc/fc_els.h>
+#include <scsi/fc/fc_fs.h>
+#include <scsi/fc/fc_gs.h>
+#include <scsi/fc/fc_ms.h>
+
+#include "csio_hw.h"
+#include "csio_mb.h"
+#include "csio_lnode.h"
+#include "csio_rnode.h"
+
+int csio_fcoe_rnodes = 512;
+int csio_fdmi_enable = 1;
+
+#define PORT_ID_PTR(_x)         ((uint8_t *)(&_x) + 1)
+
+/* Lnode SM declarations */
+static void csio_lns_uninit(struct csio_lnode *, enum csio_ln_ev);
+static void csio_lns_online(struct csio_lnode *, enum csio_ln_ev);
+static void csio_lns_ready(struct csio_lnode *, enum csio_ln_ev);
+static void csio_lns_offline(struct csio_lnode *, enum csio_ln_ev);
+
+static int csio_ln_mgmt_submit_req(struct csio_ioreq *,
+		void (*io_cbfn) (struct csio_hw *, struct csio_ioreq *),
+		enum fcoe_cmn_type, struct csio_dma_buf *, uint32_t);
+
+/* LN event mapping */
+static enum csio_ln_ev fwevt_to_lnevt[] = {
+	CSIO_LNE_NONE,		/* None */
+	CSIO_LNE_NONE,		/* PLOGI_ACC_RCVD  */
+	CSIO_LNE_NONE,		/* PLOGI_RJT_RCVD  */
+	CSIO_LNE_NONE,		/* PLOGI_RCVD	   */
+	CSIO_LNE_NONE,		/* PLOGO_RCVD	   */
+	CSIO_LNE_NONE,		/* PRLI_ACC_RCVD   */
+	CSIO_LNE_NONE,		/* PRLI_RJT_RCVD   */
+	CSIO_LNE_NONE,		/* PRLI_RCVD	   */
+	CSIO_LNE_NONE,		/* PRLO_RCVD	   */
+	CSIO_LNE_NONE,		/* NPORT_ID_CHGD   */
+	CSIO_LNE_LOGO,		/* FLOGO_RCVD	   */
+	CSIO_LNE_LOGO,		/* CLR_VIRT_LNK_RCVD */
+	CSIO_LNE_FAB_INIT_DONE,/* FLOGI_ACC_RCVD   */
+	CSIO_LNE_NONE,		/* FLOGI_RJT_RCVD   */
+	CSIO_LNE_FAB_INIT_DONE,/* FDISC_ACC_RCVD   */
+	CSIO_LNE_NONE,		/* FDISC_RJT_RCVD   */
+	CSIO_LNE_NONE,		/* FLOGI_TMO_MAX_RETRY */
+	CSIO_LNE_NONE,		/* IMPL_LOGO_ADISC_ACC */
+	CSIO_LNE_NONE,		/* IMPL_LOGO_ADISC_RJT */
+	CSIO_LNE_NONE,		/* IMPL_LOGO_ADISC_CNFLT */
+	CSIO_LNE_NONE,		/* PRLI_TMO		*/
+	CSIO_LNE_NONE,		/* ADISC_TMO		*/
+	CSIO_LNE_NONE,		/* RSCN_DEV_LOST */
+	CSIO_LNE_NONE,		/* SCR_ACC_RCVD */
+	CSIO_LNE_NONE,		/* ADISC_RJT_RCVD */
+	CSIO_LNE_NONE,		/* LOGO_SNT */
+	CSIO_LNE_NONE,		/* PROTO_ERR_IMPL_LOGO */
+};
+
+#define CSIO_FWE_TO_LNE(_evt)	((_evt > PROTO_ERR_IMPL_LOGO) ?		\
+						CSIO_LNE_NONE :	\
+						fwevt_to_lnevt[_evt])
+
+#define csio_ct_rsp(cp)		(((struct fc_ct_hdr *)cp)->ct_cmd)
+#define csio_ct_reason(cp)	(((struct fc_ct_hdr *)cp)->ct_reason)
+#define csio_ct_expl(cp)	(((struct fc_ct_hdr *)cp)->ct_explan)
+#define csio_ct_get_pld(cp)	((void *)(((uint8_t *)cp) + FC_CT_HDR_LEN))
+
+/*
+ * csio_ln_match_by_portid - lookup lnode using given portid.
+ * @hw: HW module
+ * @portid: port-id.
+ *
+ * If found, returns lnode matching given portid otherwise returns NULL.
+ */
+static struct csio_lnode *
+csio_ln_lookup_by_portid(struct csio_hw *hw, uint8_t portid)
+{
+	struct csio_lnode *ln = hw->rln;
+	struct list_head *tmp;
+
+	/* Match siblings lnode with portid */
+	list_for_each(tmp, &hw->sln_head) {
+		ln = (struct csio_lnode *) tmp;
+		if (ln->portid == portid)
+			return ln;
+	}
+
+	return NULL;
+}
+
+/*
+ * csio_ln_lookup_by_vnpi - Lookup lnode using given vnp id.
+ * @hw - HW module
+ * @vnpi - vnp index.
+ * Returns - If found, returns lnode matching given vnp id
+ * otherwise returns NULL.
+ */
+static struct csio_lnode *
+csio_ln_lookup_by_vnpi(struct csio_hw *hw, uint32_t vnp_id)
+{
+	struct list_head *tmp1, *tmp2;
+	struct csio_lnode *sln = NULL, *cln = NULL;
+
+	if (list_empty(&hw->sln_head)) {
+		CSIO_INC_STATS(hw, n_lnlkup_miss);
+		return NULL;
+	}
+	/* Traverse sibling lnodes */
+	list_for_each(tmp1, &hw->sln_head) {
+		sln = (struct csio_lnode *) tmp1;
+
+		/* Match sibling lnode */
+		if (sln->vnp_flowid == vnp_id)
+			return sln;
+
+		if (list_empty(&sln->cln_head))
+			continue;
+
+		/* Traverse children lnodes */
+		list_for_each(tmp2, &sln->cln_head) {
+			cln = (struct csio_lnode *) tmp2;
+
+			if (cln->vnp_flowid == vnp_id)
+				return cln;
+		}
+	}
+	CSIO_INC_STATS(hw, n_lnlkup_miss);
+	return NULL;
+}
+
+/**
+ * csio_lnode_lookup_by_wwpn - Lookup lnode using given wwpn.
+ * @hw:		HW module.
+ * @wwpn:	WWPN.
+ *
+ * If found, returns lnode matching given wwpn, returns NULL otherwise.
+ */
+struct csio_lnode *
+csio_lnode_lookup_by_wwpn(struct csio_hw *hw, uint8_t *wwpn)
+{
+	struct list_head *tmp1, *tmp2;
+	struct csio_lnode *sln = NULL, *cln = NULL;
+
+	if (list_empty(&hw->sln_head)) {
+		CSIO_INC_STATS(hw, n_lnlkup_miss);
+		return NULL;
+	}
+	/* Traverse sibling lnodes */
+	list_for_each(tmp1, &hw->sln_head) {
+		sln = (struct csio_lnode *) tmp1;
+
+		/* Match sibling lnode */
+		if (!memcmp(csio_ln_wwpn(sln), wwpn, 8))
+			return sln;
+
+		if (list_empty(&sln->cln_head))
+			continue;
+
+		/* Traverse children lnodes */
+		list_for_each(tmp2, &sln->cln_head) {
+			cln = (struct csio_lnode *) tmp2;
+
+			if (!memcmp(csio_ln_wwpn(cln), wwpn, 8))
+				return cln;
+		}
+	}
+	return NULL;
+}
+
+/* FDMI */
+static void
+csio_fill_ct_iu(void *buf, uint8_t type, uint8_t sub_type, uint16_t op)
+{
+	struct fc_ct_hdr *cmd = (struct fc_ct_hdr *)buf;
+	cmd->ct_rev = FC_CT_REV;
+	cmd->ct_fs_type = type;
+	cmd->ct_fs_subtype = sub_type;
+	cmd->ct_cmd = op;
+}
+
+static int
+csio_hostname(uint8_t *buf, size_t buf_len)
+{
+	if (sprintf(buf, "%s", init_utsname()->nodename))
+		return 0;
+	return -1;
+}
+
+static int
+csio_osname(uint8_t *buf, size_t buf_len)
+{
+	uint8_t *ptr = buf;
+
+	strcpy(ptr, init_utsname()->sysname);
+	ptr += strlen(init_utsname()->sysname);
+	*ptr = ' '; /* SPACE */
+	strcpy(ptr, init_utsname()->release);
+	ptr += strlen(init_utsname()->release);
+	*ptr = ' '; /* SPACE */
+	strcpy(ptr, init_utsname()->version);
+	ptr += strlen(init_utsname()->version);
+	*ptr = '\0';
+	return 0;
+}
+
+static inline void
+csio_append_attrib(uint8_t **ptr, uint16_t type, uint8_t *val, uint16_t len)
+{
+	struct fc_fdmi_attr_entry *ae = (struct fc_fdmi_attr_entry *)*ptr;
+	ae->type = htons(type);
+	len += 4;		/* includes attribute type and length */
+	len = (len + 3) & ~3;	/* should be multiple of 4 bytes */
+	ae->len = htons(len);
+	memset(ae->value, 0, len - 4);
+	memcpy(ae->value, val, len);
+	*ptr += len;
+}
+
+/*
+ * csio_ln_fdmi_done - FDMI registeration completion
+ * @hw: HW context
+ * @fdmi_req: fdmi request
+ */
+static void
+csio_ln_fdmi_done(struct csio_hw *hw, struct csio_ioreq *fdmi_req)
+{
+	void *cmd;
+	struct csio_lnode *ln = fdmi_req->lnode;
+
+	if (fdmi_req->wr_status != FW_SUCCESS) {
+		csio_ln_err(ln, "WR error:%x in processing fdmi rpa cmd\n",
+			    fdmi_req->wr_status);
+		CSIO_INC_STATS(ln, n_fdmi_err);
+	}
+
+	cmd = fdmi_req->dma_buf.vaddr;
+	if (ntohs(csio_ct_rsp(cmd)) != FC_FS_ACC) {
+		csio_ln_dbg(ln, "fdmi rpa cmd rejected reason %x expl %x\n",
+			    csio_ct_reason(cmd), csio_ct_expl(cmd));
+	}
+}
+
+/*
+ * csio_ln_fdmi_rhba_cbfn - RHBA completion
+ * @hw: HW context
+ * @fdmi_req: fdmi request
+ */
+static void
+csio_ln_fdmi_rhba_cbfn(struct csio_hw *hw, struct csio_ioreq *fdmi_req)
+{
+	void *cmd;
+	uint8_t *pld;
+	uint32_t len = 0;
+	struct csio_lnode *ln = fdmi_req->lnode;
+	struct fs_fdmi_attrs *attrib_blk;
+	struct fc_fdmi_port_name *port_name;
+	uint8_t buf[64];
+	uint32_t val;
+	uint8_t *fc4_type;
+
+	if (fdmi_req->wr_status != FW_SUCCESS) {
+		csio_ln_err(ln, "WR error:%x in processing fdmi rhba cmd\n",
+			    fdmi_req->wr_status);
+		CSIO_INC_STATS(ln, n_fdmi_err);
+	}
+
+	cmd = fdmi_req->dma_buf.vaddr;
+	if (ntohs(csio_ct_rsp(cmd)) != FC_FS_ACC) {
+		csio_ln_dbg(ln, "fdmi rhba cmd rejected reason %x expl %x\n",
+			    csio_ct_reason(cmd), csio_ct_expl(cmd));
+	}
+
+	if (!csio_is_rnode_ready(fdmi_req->rnode)) {
+		CSIO_INC_STATS(ln, n_fdmi_err);
+		return;
+	}
+
+	/* Prepare CT hdr for RPA cmd */
+	memset(cmd, 0, FC_CT_HDR_LEN);
+	csio_fill_ct_iu(cmd, FC_FST_MGMT, FC_FDMI_SUBTYPE, htons(FC_FDMI_RPA));
+
+	/* Prepare RPA payload */
+	pld = (uint8_t *)csio_ct_get_pld(cmd);
+	port_name = (struct fc_fdmi_port_name *)pld;
+	memcpy(&port_name->portname, csio_ln_wwpn(ln), 8);
+	pld += sizeof(*port_name);
+
+	/* Start appending Port attributes */
+	attrib_blk = (struct fs_fdmi_attrs *)pld;
+	attrib_blk->numattrs = 0;
+	len += sizeof(attrib_blk->numattrs);
+	pld += sizeof(attrib_blk->numattrs);
+
+	fc4_type = &buf[0];
+	memset(fc4_type, 0, FC_FDMI_PORT_ATTR_FC4TYPES_LEN);
+	fc4_type[2] = 1;
+	fc4_type[7] = 1;
+	csio_append_attrib(&pld, FC_FDMI_PORT_ATTR_FC4TYPES,
+			   fc4_type, FC_FDMI_PORT_ATTR_FC4TYPES_LEN);
+	attrib_blk->numattrs++;
+	val = htonl(FC_PORTSPEED_1GBIT | FC_PORTSPEED_10GBIT);
+	csio_append_attrib(&pld, FC_FDMI_PORT_ATTR_SUPPORTEDSPEED,
+			   (uint8_t *)&val,
+			   FC_FDMI_PORT_ATTR_SUPPORTEDSPEED_LEN);
+	attrib_blk->numattrs++;
+
+	if (hw->pport[ln->portid].link_speed == FW_PORT_CAP_SPEED_1G)
+		val = htonl(FC_PORTSPEED_1GBIT);
+	else if (hw->pport[ln->portid].link_speed == FW_PORT_CAP_SPEED_10G)
+		val = htonl(FC_PORTSPEED_10GBIT);
+	else
+		val = htonl(CSIO_HBA_PORTSPEED_UNKNOWN);
+	csio_append_attrib(&pld, FC_FDMI_PORT_ATTR_CURRENTPORTSPEED,
+			   (uint8_t *)&val,
+			   FC_FDMI_PORT_ATTR_CURRENTPORTSPEED_LEN);
+	attrib_blk->numattrs++;
+
+	val = htonl(ln->ln_sparm.csp.sp_bb_data);
+	csio_append_attrib(&pld, FC_FDMI_PORT_ATTR_MAXFRAMESIZE,
+			   (uint8_t *)&val, FC_FDMI_PORT_ATTR_MAXFRAMESIZE_LEN);
+	attrib_blk->numattrs++;
+
+	strcpy(buf, "csiostor");
+	csio_append_attrib(&pld, FC_FDMI_PORT_ATTR_OSDEVICENAME, buf,
+			   (uint16_t)strlen(buf));
+	attrib_blk->numattrs++;
+
+	if (!csio_hostname(buf, sizeof(buf))) {
+		csio_append_attrib(&pld, FC_FDMI_PORT_ATTR_HOSTNAME,
+				   buf, (uint16_t)strlen(buf));
+		attrib_blk->numattrs++;
+	}
+	attrib_blk->numattrs = ntohl(attrib_blk->numattrs);
+	len = (uint32_t)(pld - (uint8_t *)cmd);
+
+	/* Submit FDMI RPA request */
+	spin_lock_irq(&hw->lock);
+	if (csio_ln_mgmt_submit_req(fdmi_req, csio_ln_fdmi_done,
+				FCOE_CT, &fdmi_req->dma_buf, len)) {
+		CSIO_INC_STATS(ln, n_fdmi_err);
+		csio_ln_err(ln, "Failed to issue fdmi rpa req\n");
+	}
+	spin_unlock_irq(&hw->lock);
+}
+
+/*
+ * csio_ln_fdmi_dprt_cbfn - DPRT completion
+ * @hw: HW context
+ * @fdmi_req: fdmi request
+ */
+static void
+csio_ln_fdmi_dprt_cbfn(struct csio_hw *hw, struct csio_ioreq *fdmi_req)
+{
+	void *cmd;
+	uint8_t *pld;
+	uint32_t len = 0;
+	uint32_t maxpayload = htonl(65536);
+	struct fc_fdmi_hba_identifier *hbaid;
+	struct csio_lnode *ln = fdmi_req->lnode;
+	struct fc_fdmi_rpl *reg_pl;
+	struct fs_fdmi_attrs *attrib_blk;
+	uint8_t buf[64];
+
+	if (fdmi_req->wr_status != FW_SUCCESS) {
+		csio_ln_err(ln, "WR error:%x in processing fdmi dprt cmd\n",
+			    fdmi_req->wr_status);
+		CSIO_INC_STATS(ln, n_fdmi_err);
+	}
+
+	if (!csio_is_rnode_ready(fdmi_req->rnode)) {
+		CSIO_INC_STATS(ln, n_fdmi_err);
+		return;
+	}
+	cmd = fdmi_req->dma_buf.vaddr;
+	if (ntohs(csio_ct_rsp(cmd)) != FC_FS_ACC) {
+		csio_ln_dbg(ln, "fdmi dprt cmd rejected reason %x expl %x\n",
+			    csio_ct_reason(cmd), csio_ct_expl(cmd));
+	}
+
+	/* Prepare CT hdr for RHBA cmd */
+	memset(cmd, 0, FC_CT_HDR_LEN);
+	csio_fill_ct_iu(cmd, FC_FST_MGMT, FC_FDMI_SUBTYPE, htons(FC_FDMI_RHBA));
+	len = FC_CT_HDR_LEN;
+
+	/* Prepare RHBA payload */
+	pld = (uint8_t *)csio_ct_get_pld(cmd);
+	hbaid = (struct fc_fdmi_hba_identifier *)pld;
+	memcpy(&hbaid->id, csio_ln_wwpn(ln), 8); /* HBA identifer */
+	pld += sizeof(*hbaid);
+
+	/* Register one port per hba */
+	reg_pl = (struct fc_fdmi_rpl *)pld;
+	reg_pl->numport = ntohl(1);
+	memcpy(&reg_pl->port[0].portname, csio_ln_wwpn(ln), 8);
+	pld += sizeof(*reg_pl);
+
+	/* Start appending HBA attributes hba */
+	attrib_blk = (struct fs_fdmi_attrs *)pld;
+	attrib_blk->numattrs = 0;
+	len += sizeof(attrib_blk->numattrs);
+	pld += sizeof(attrib_blk->numattrs);
+
+	csio_append_attrib(&pld, FC_FDMI_HBA_ATTR_NODENAME, csio_ln_wwnn(ln),
+			   FC_FDMI_HBA_ATTR_NODENAME_LEN);
+	attrib_blk->numattrs++;
+
+	memset(buf, 0, sizeof(buf));
+
+	strcpy(buf, "Chelsio Communications");
+	csio_append_attrib(&pld, FC_FDMI_HBA_ATTR_MANUFACTURER, buf,
+			   (uint16_t)strlen(buf));
+	attrib_blk->numattrs++;
+	csio_append_attrib(&pld, FC_FDMI_HBA_ATTR_SERIALNUMBER,
+			   hw->vpd.sn, (uint16_t)sizeof(hw->vpd.sn));
+	attrib_blk->numattrs++;
+	csio_append_attrib(&pld, FC_FDMI_HBA_ATTR_MODEL, hw->vpd.id,
+			   (uint16_t)sizeof(hw->vpd.id));
+	attrib_blk->numattrs++;
+	csio_append_attrib(&pld, FC_FDMI_HBA_ATTR_MODELDESCRIPTION,
+			   hw->model_desc, (uint16_t)strlen(hw->model_desc));
+	attrib_blk->numattrs++;
+	csio_append_attrib(&pld, FC_FDMI_HBA_ATTR_HARDWAREVERSION,
+			   hw->hw_ver, (uint16_t)sizeof(hw->hw_ver));
+	attrib_blk->numattrs++;
+	csio_append_attrib(&pld, FC_FDMI_HBA_ATTR_FIRMWAREVERSION,
+			   hw->fwrev_str, (uint16_t)strlen(hw->fwrev_str));
+	attrib_blk->numattrs++;
+
+	if (!csio_osname(buf, sizeof(buf))) {
+		csio_append_attrib(&pld, FC_FDMI_HBA_ATTR_OSNAMEVERSION,
+				   buf, (uint16_t)strlen(buf));
+		attrib_blk->numattrs++;
+	}
+
+	csio_append_attrib(&pld, FC_FDMI_HBA_ATTR_MAXCTPAYLOAD,
+			   (uint8_t *)&maxpayload,
+			   FC_FDMI_HBA_ATTR_MAXCTPAYLOAD_LEN);
+	len = (uint32_t)(pld - (uint8_t *)cmd);
+	attrib_blk->numattrs++;
+	attrib_blk->numattrs = ntohl(attrib_blk->numattrs);
+
+	/* Submit FDMI RHBA request */
+	spin_lock_irq(&hw->lock);
+	if (csio_ln_mgmt_submit_req(fdmi_req, csio_ln_fdmi_rhba_cbfn,
+				FCOE_CT, &fdmi_req->dma_buf, len)) {
+		CSIO_INC_STATS(ln, n_fdmi_err);
+		csio_ln_err(ln, "Failed to issue fdmi rhba req\n");
+	}
+	spin_unlock_irq(&hw->lock);
+}
+
+/*
+ * csio_ln_fdmi_dhba_cbfn - DHBA completion
+ * @hw: HW context
+ * @fdmi_req: fdmi request
+ */
+static void
+csio_ln_fdmi_dhba_cbfn(struct csio_hw *hw, struct csio_ioreq *fdmi_req)
+{
+	struct csio_lnode *ln = fdmi_req->lnode;
+	void *cmd;
+	struct fc_fdmi_port_name *port_name;
+	uint32_t len;
+
+	if (fdmi_req->wr_status != FW_SUCCESS) {
+		csio_ln_err(ln, "WR error:%x in processing fdmi dhba cmd\n",
+			    fdmi_req->wr_status);
+		CSIO_INC_STATS(ln, n_fdmi_err);
+	}
+
+	if (!csio_is_rnode_ready(fdmi_req->rnode)) {
+		CSIO_INC_STATS(ln, n_fdmi_err);
+		return;
+	}
+	cmd = fdmi_req->dma_buf.vaddr;
+	if (ntohs(csio_ct_rsp(cmd)) != FC_FS_ACC) {
+		csio_ln_dbg(ln, "fdmi dhba cmd rejected reason %x expl %x\n",
+			    csio_ct_reason(cmd), csio_ct_expl(cmd));
+	}
+
+	/* Send FDMI cmd to de-register any Port attributes if registered
+	 * before
+	 */
+
+	/* Prepare FDMI DPRT cmd */
+	memset(cmd, 0, FC_CT_HDR_LEN);
+	csio_fill_ct_iu(cmd, FC_FST_MGMT, FC_FDMI_SUBTYPE, htons(FC_FDMI_DPRT));
+	len = FC_CT_HDR_LEN;
+	port_name = (struct fc_fdmi_port_name *)csio_ct_get_pld(cmd);
+	memcpy(&port_name->portname, csio_ln_wwpn(ln), 8);
+	len += sizeof(*port_name);
+
+	/* Submit FDMI request */
+	spin_lock_irq(&hw->lock);
+	if (csio_ln_mgmt_submit_req(fdmi_req, csio_ln_fdmi_dprt_cbfn,
+				FCOE_CT, &fdmi_req->dma_buf, len)) {
+		CSIO_INC_STATS(ln, n_fdmi_err);
+		csio_ln_err(ln, "Failed to issue fdmi dprt req\n");
+	}
+	spin_unlock_irq(&hw->lock);
+}
+
+/**
+ * csio_ln_fdmi_start - Start an FDMI request.
+ * @ln:		lnode
+ * @context:	session context
+ *
+ * Issued with lock held.
+ */
+int
+csio_ln_fdmi_start(struct csio_lnode *ln, void *context)
+{
+	struct csio_ioreq *fdmi_req;
+	struct csio_rnode *fdmi_rn = (struct csio_rnode *)context;
+	void *cmd;
+	struct fc_fdmi_hba_identifier *hbaid;
+	uint32_t len;
+
+	if (!(ln->flags & CSIO_LNF_FDMI_ENABLE))
+		return -EPROTONOSUPPORT;
+
+	if (!csio_is_rnode_ready(fdmi_rn))
+		CSIO_INC_STATS(ln, n_fdmi_err);
+
+	/* Send FDMI cmd to de-register any HBA attributes if registered
+	 * before
+	 */
+
+	fdmi_req = ln->mgmt_req;
+	fdmi_req->lnode = ln;
+	fdmi_req->rnode = fdmi_rn;
+
+	/* Prepare FDMI DHBA cmd */
+	cmd = fdmi_req->dma_buf.vaddr;
+	memset(cmd, 0, FC_CT_HDR_LEN);
+	csio_fill_ct_iu(cmd, FC_FST_MGMT, FC_FDMI_SUBTYPE, htons(FC_FDMI_DHBA));
+	len = FC_CT_HDR_LEN;
+
+	hbaid = (struct fc_fdmi_hba_identifier *)csio_ct_get_pld(cmd);
+	memcpy(&hbaid->id, csio_ln_wwpn(ln), 8);
+	len += sizeof(*hbaid);
+
+	/* Submit FDMI request */
+	if (csio_ln_mgmt_submit_req(fdmi_req, csio_ln_fdmi_dhba_cbfn,
+					FCOE_CT, &fdmi_req->dma_buf, len)) {
+		CSIO_INC_STATS(ln, n_fdmi_err);
+		csio_ln_err(ln, "Failed to issue fdmi dhba req\n");
+	}
+
+	return 0;
+}
+
+/*
+ * csio_ln_vnp_read_cbfn - vnp read completion handler.
+ * @hw: HW lnode
+ * @cbfn: Completion handler.
+ *
+ * Reads vnp response and updates ln parameters.
+ */
+static void
+csio_ln_vnp_read_cbfn(struct csio_hw *hw, struct csio_mb *mbp)
+{
+	struct csio_lnode *ln = ((struct csio_lnode *)mbp->priv);
+	struct fw_fcoe_vnp_cmd *rsp = (struct fw_fcoe_vnp_cmd *)(mbp->mb);
+	struct fc_els_csp *csp;
+	struct fc_els_cssp *clsp;
+	enum fw_retval retval;
+
+	spin_lock_irq(&hw->lock);
+
+	retval = FW_CMD_RETVAL_GET(ntohl(rsp->alloc_to_len16));
+	if (retval != FW_SUCCESS) {
+		csio_err(hw, "FCOE VNP read cmd returned error:0x%x\n", retval);
+		spin_unlock_irq(&hw->lock);
+		mempool_free(mbp, hw->mb_mempool);
+		return;
+	}
+
+	memcpy(ln->mac, rsp->vnport_mac, sizeof(ln->mac));
+	memcpy(&ln->nport_id, &rsp->vnport_mac[3],
+			sizeof(uint8_t)*3);
+	ln->nport_id = ntohl(ln->nport_id);
+	ln->nport_id = ln->nport_id>>8;
+
+	/* Update WWNs */
+	/*
+	 * This may look like a duplication of what csio_fcoe_enable_link()
+	 * does, but is absolutely necessary if the vnpi changes between
+	 * a FCOE LINK UP and FCOE LINK DOWN.
+	 */
+	memcpy(csio_ln_wwnn(ln), rsp->vnport_wwnn, 8);
+	memcpy(csio_ln_wwpn(ln), rsp->vnport_wwpn, 8);
+
+	/* Copy common sparam */
+	csp = (struct fc_els_csp *)rsp->cmn_srv_parms;
+	ln->ln_sparm.csp.sp_hi_ver = csp->sp_hi_ver;
+	ln->ln_sparm.csp.sp_lo_ver = csp->sp_lo_ver;
+	ln->ln_sparm.csp.sp_bb_cred = ntohs(csp->sp_bb_cred);
+	ln->ln_sparm.csp.sp_features = ntohs(csp->sp_features);
+	ln->ln_sparm.csp.sp_bb_data = ntohs(csp->sp_bb_data);
+	ln->ln_sparm.csp.sp_r_a_tov = ntohl(csp->sp_r_a_tov);
+	ln->ln_sparm.csp.sp_e_d_tov = ntohl(csp->sp_e_d_tov);
+
+	/* Copy word 0 & word 1 of class sparam */
+	clsp = (struct fc_els_cssp *)rsp->clsp_word_0_1;
+	ln->ln_sparm.clsp[2].cp_class = ntohs(clsp->cp_class);
+	ln->ln_sparm.clsp[2].cp_init = ntohs(clsp->cp_init);
+	ln->ln_sparm.clsp[2].cp_recip = ntohs(clsp->cp_recip);
+	ln->ln_sparm.clsp[2].cp_rdfs = ntohs(clsp->cp_rdfs);
+
+	spin_unlock_irq(&hw->lock);
+
+	mempool_free(mbp, hw->mb_mempool);
+
+	/* Send an event to update local attribs */
+	csio_lnode_async_event(ln, CSIO_LN_FC_ATTRIB_UPDATE);
+}
+
+/*
+ * csio_ln_vnp_read - Read vnp params.
+ * @ln: lnode
+ * @cbfn: Completion handler.
+ *
+ * Issued with lock held.
+ */
+static int
+csio_ln_vnp_read(struct csio_lnode *ln,
+		void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct csio_hw *hw = ln->hwp;
+	struct csio_mb  *mbp;
+
+	/* Allocate Mbox request */
+	mbp = mempool_alloc(hw->mb_mempool, GFP_ATOMIC);
+	if (!mbp) {
+		CSIO_INC_STATS(hw, n_err_nomem);
+		return -ENOMEM;
+	}
+
+	/* Prepare VNP Command */
+	csio_fcoe_vnp_read_init_mb(ln, mbp,
+				    CSIO_MB_DEFAULT_TMO,
+				    ln->fcf_flowid,
+				    ln->vnp_flowid,
+				    cbfn);
+
+	/* Issue MBOX cmd */
+	if (csio_mb_issue(hw, mbp)) {
+		csio_err(hw, "Failed to issue mbox FCoE VNP command\n");
+		mempool_free(mbp, hw->mb_mempool);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * csio_fcoe_enable_link - Enable fcoe link.
+ * @ln: lnode
+ * @enable: enable/disable
+ * Issued with lock held.
+ * Issues mbox cmd to bring up FCOE link on port associated with given ln.
+ */
+static int
+csio_fcoe_enable_link(struct csio_lnode *ln, bool enable)
+{
+	struct csio_hw *hw = ln->hwp;
+	struct csio_mb  *mbp;
+	enum fw_retval retval;
+	uint8_t portid;
+	uint8_t sub_op;
+	struct fw_fcoe_link_cmd *lcmd;
+	int i;
+
+	mbp = mempool_alloc(hw->mb_mempool, GFP_ATOMIC);
+	if (!mbp) {
+		CSIO_INC_STATS(hw, n_err_nomem);
+		return -ENOMEM;
+	}
+
+	portid = ln->portid;
+	sub_op = enable ? FCOE_LINK_UP : FCOE_LINK_DOWN;
+
+	csio_dbg(hw, "bringing FCOE LINK %s on Port:%d\n",
+		 sub_op ? "UP" : "DOWN", portid);
+
+	csio_write_fcoe_link_cond_init_mb(ln, mbp, CSIO_MB_DEFAULT_TMO,
+					  portid, sub_op, 0, 0, 0, NULL);
+
+	if (csio_mb_issue(hw, mbp)) {
+		csio_err(hw, "failed to issue FCOE LINK cmd on port[%d]\n",
+			portid);
+		mempool_free(mbp, hw->mb_mempool);
+		return -EINVAL;
+	}
+
+	retval = csio_mb_fw_retval(mbp);
+	if (retval != FW_SUCCESS) {
+		csio_err(hw,
+			 "FCOE LINK %s cmd on port[%d] failed with "
+			 "ret:x%x\n", sub_op ? "UP" : "DOWN", portid, retval);
+		mempool_free(mbp, hw->mb_mempool);
+		return -EINVAL;
+	}
+
+	if (!enable)
+		goto out;
+
+	lcmd = (struct fw_fcoe_link_cmd *)mbp->mb;
+
+	memcpy(csio_ln_wwnn(ln), lcmd->vnport_wwnn, 8);
+	memcpy(csio_ln_wwpn(ln), lcmd->vnport_wwpn, 8);
+
+	for (i = 0; i < CSIO_MAX_PPORTS; i++)
+		if (hw->pport[i].portid == portid)
+			memcpy(hw->pport[i].mac, lcmd->phy_mac, 6);
+
+out:
+	mempool_free(mbp, hw->mb_mempool);
+	return 0;
+}
+
+/*
+ * csio_ln_read_fcf_cbfn - Read fcf parameters
+ * @ln: lnode
+ *
+ * read fcf response and Update ln fcf information.
+ */
+static void
+csio_ln_read_fcf_cbfn(struct csio_hw *hw, struct csio_mb *mbp)
+{
+	struct csio_lnode *ln = (struct csio_lnode *)mbp->priv;
+	struct csio_fcf_info	*fcf_info;
+	struct fw_fcoe_fcf_cmd *rsp =
+				(struct fw_fcoe_fcf_cmd *)(mbp->mb);
+	enum fw_retval retval;
+
+	spin_lock_irq(&hw->lock);
+
+	retval = FW_CMD_RETVAL_GET(ntohl(rsp->retval_len16));
+	if (retval != FW_SUCCESS) {
+		csio_ln_err(ln, "FCOE FCF cmd failed with ret x%x\n",
+				retval);
+		mempool_free(mbp, hw->mb_mempool);
+		spin_unlock_irq(&hw->lock);
+		return;
+	}
+
+	fcf_info = ln->fcfinfo;
+	fcf_info->priority = FW_FCOE_FCF_CMD_PRIORITY_GET(
+					ntohs(rsp->priority_pkd));
+	fcf_info->vf_id = ntohs(rsp->vf_id);
+	fcf_info->vlan_id = rsp->vlan_id;
+	fcf_info->max_fcoe_size = ntohs(rsp->max_fcoe_size);
+	fcf_info->fka_adv = be32_to_cpu(rsp->fka_adv);
+	fcf_info->fcfi = FW_FCOE_FCF_CMD_FCFI_GET(ntohl(rsp->op_to_fcfi));
+	fcf_info->fpma = FW_FCOE_FCF_CMD_FPMA_GET(rsp->fpma_to_portid);
+	fcf_info->spma = FW_FCOE_FCF_CMD_SPMA_GET(rsp->fpma_to_portid);
+	fcf_info->login = FW_FCOE_FCF_CMD_LOGIN_GET(rsp->fpma_to_portid);
+	fcf_info->portid = FW_FCOE_FCF_CMD_PORTID_GET(rsp->fpma_to_portid);
+	memcpy(fcf_info->fc_map, rsp->fc_map, sizeof(fcf_info->fc_map));
+	memcpy(fcf_info->mac, rsp->mac, sizeof(fcf_info->mac));
+	memcpy(fcf_info->name_id, rsp->name_id, sizeof(fcf_info->name_id));
+	memcpy(fcf_info->fabric, rsp->fabric, sizeof(fcf_info->fabric));
+	memcpy(fcf_info->spma_mac, rsp->spma_mac, sizeof(fcf_info->spma_mac));
+
+	spin_unlock_irq(&hw->lock);
+	mempool_free(mbp, hw->mb_mempool);
+
+}
+
+/*
+ * csio_ln_read_fcf_entry - Read fcf entry.
+ * @ln: lnode
+ * @cbfn: Completion handler.
+ *
+ * Issued with lock held.
+ */
+static int
+csio_ln_read_fcf_entry(struct csio_lnode *ln,
+			void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct csio_hw *hw = ln->hwp;
+	struct csio_mb  *mbp;
+
+	mbp = mempool_alloc(hw->mb_mempool, GFP_ATOMIC);
+	if (!mbp) {
+		CSIO_INC_STATS(hw, n_err_nomem);
+		return -ENOMEM;
+	}
+
+	/* Get FCoE FCF information */
+	csio_fcoe_read_fcf_init_mb(ln, mbp, CSIO_MB_DEFAULT_TMO,
+				      ln->portid, ln->fcf_flowid, cbfn);
+
+	if (csio_mb_issue(hw, mbp)) {
+		csio_err(hw, "failed to issue FCOE FCF cmd\n");
+		mempool_free(mbp, hw->mb_mempool);
+		return -EINVAL;
+	}
+
+	if (cbfn == NULL) {
+		spin_unlock_irq(&hw->lock);
+		csio_ln_read_fcf_cbfn(hw, mbp);
+		spin_lock_irq(&hw->lock);
+	}
+
+	return 0;
+}
+
+/*
+ * csio_handle_link_up - Logical Linkup event.
+ * @hw - HW module.
+ * @portid - Physical port number
+ * @fcfi - FCF index.
+ * @vnpi - VNP index.
+ * Returns - none.
+ *
+ * This event is received from FW, when virtual link is established between
+ * Physical port[ENode] and FCF. If its new vnpi, then local node object is
+ * created on this FCF and set to [ONLINE] state.
+ * Lnode waits for FW_RDEV_CMD event to be received indicating that
+ * Fabric login is completed and lnode moves to [READY] state.
+ *
+ * This called with hw lock held
+ */
+static void
+csio_handle_link_up(struct csio_hw *hw, uint8_t portid, uint32_t fcfi,
+		    uint32_t vnpi)
+{
+	struct csio_lnode *ln = NULL;
+
+	/* Lookup lnode based on vnpi */
+	ln = csio_ln_lookup_by_vnpi(hw, vnpi);
+	if (!ln) {
+		/* Pick lnode based on portid */
+		ln = csio_ln_lookup_by_portid(hw, portid);
+		if (!ln) {
+			csio_err(hw, "failed to lookup fcoe lnode on port:%d\n",
+				portid);
+			CSIO_DB_ASSERT(0);
+			return;
+		}
+
+		/* Check if lnode has valid vnp flowid */
+		if (ln->vnp_flowid != CSIO_INVALID_IDX) {
+			/* New VN-Port */
+			spin_unlock_irq(&hw->lock);
+			csio_lnode_alloc(hw);
+			spin_lock_irq(&hw->lock);
+			if (!ln) {
+				csio_err(hw,
+					 "failed to allocate fcoe lnode"
+					 "for port:%d vnpi:x%x\n",
+					 portid, vnpi);
+				CSIO_DB_ASSERT(0);
+				return;
+			}
+			ln->portid = portid;
+		}
+		ln->vnp_flowid = vnpi;
+		ln->dev_num &= ~0xFFFF;
+		ln->dev_num |= vnpi;
+	}
+
+	/*Initialize fcfi */
+	ln->fcf_flowid = fcfi;
+
+	csio_info(hw, "Port:%d - FCOE LINK UP\n", portid);
+
+	CSIO_INC_STATS(ln, n_link_up);
+
+	/* Send LINKUP event to SM */
+	csio_post_event(&ln->sm, CSIO_LNE_LINKUP);
+}
+
+/*
+ * csio_post_event_rns
+ * @ln - FCOE lnode
+ * @evt - Given rnode event
+ * Returns - none
+ *
+ * Posts given rnode event to all FCOE rnodes connected with given Lnode.
+ * This routine is invoked when lnode receives LINK_DOWN/DOWN_LINK/CLOSE
+ * event.
+ *
+ * This called with hw lock held
+ */
+static void
+csio_post_event_rns(struct csio_lnode *ln, enum csio_rn_ev evt)
+{
+	struct csio_rnode *rnhead = (struct csio_rnode *) &ln->rnhead;
+	struct list_head *tmp, *next;
+	struct csio_rnode *rn;
+
+	list_for_each_safe(tmp, next, &rnhead->sm.sm_list) {
+		rn = (struct csio_rnode *) tmp;
+		csio_post_event(&rn->sm, evt);
+	}
+}
+
+/*
+ * csio_cleanup_rns
+ * @ln - FCOE lnode
+ * Returns - none
+ *
+ * Frees all FCOE rnodes connected with given Lnode.
+ *
+ * This called with hw lock held
+ */
+static void
+csio_cleanup_rns(struct csio_lnode *ln)
+{
+	struct csio_rnode *rnhead = (struct csio_rnode *) &ln->rnhead;
+	struct list_head *tmp, *next_rn;
+	struct csio_rnode *rn;
+
+	list_for_each_safe(tmp, next_rn, &rnhead->sm.sm_list) {
+		rn = (struct csio_rnode *) tmp;
+		csio_put_rnode(ln, rn);
+	}
+
+}
+
+/*
+ * csio_post_event_lns
+ * @ln - FCOE lnode
+ * @evt - Given lnode event
+ * Returns - none
+ *
+ * Posts given lnode event to all FCOE lnodes connected with given Lnode.
+ * This routine is invoked when lnode receives LINK_DOWN/DOWN_LINK/CLOSE
+ * event.
+ *
+ * This called with hw lock held
+ */
+static void
+csio_post_event_lns(struct csio_lnode *ln, enum csio_ln_ev evt)
+{
+	struct list_head *tmp;
+	struct csio_lnode *cln, *sln;
+
+	/* If NPIV lnode, send evt only to that and return */
+	if (csio_is_npiv_ln(ln)) {
+		csio_post_event(&ln->sm, evt);
+		return;
+	}
+
+	sln = ln;
+	/* Traverse children lnodes list and send evt */
+	list_for_each(tmp, &sln->cln_head) {
+		cln = (struct csio_lnode *) tmp;
+		csio_post_event(&cln->sm, evt);
+	}
+
+	/* Send evt to parent lnode */
+	csio_post_event(&ln->sm, evt);
+}
+
+/*
+ * csio_ln_down - Lcoal nport is down
+ * @ln - FCOE Lnode
+ * Returns - none
+ *
+ * Sends LINK_DOWN events to Lnode and its associated NPIVs lnodes.
+ *
+ * This called with hw lock held
+ */
+static void
+csio_ln_down(struct csio_lnode *ln)
+{
+	csio_post_event_lns(ln, CSIO_LNE_LINK_DOWN);
+}
+
+/*
+ * csio_handle_link_down - Logical Linkdown event.
+ * @hw - HW module.
+ * @portid - Physical port number
+ * @fcfi - FCF index.
+ * @vnpi - VNP index.
+ * Returns - none
+ *
+ * This event is received from FW, when virtual link goes down between
+ * Physical port[ENode] and FCF. Lnode and its associated NPIVs lnode hosted on
+ * this vnpi[VN-Port] will be de-instantiated.
+ *
+ * This called with hw lock held
+ */
+static void
+csio_handle_link_down(struct csio_hw *hw, uint8_t portid, uint32_t fcfi,
+		      uint32_t vnpi)
+{
+	struct csio_fcf_info *fp;
+	struct csio_lnode *ln;
+
+	/* Lookup lnode based on vnpi */
+	ln = csio_ln_lookup_by_vnpi(hw, vnpi);
+	if (ln) {
+		fp = ln->fcfinfo;
+		CSIO_INC_STATS(ln, n_link_down);
+
+		/*Warn if linkdown received if lnode is not in ready state */
+		if (!csio_is_lnode_ready(ln)) {
+			csio_ln_warn(ln,
+				"warn: FCOE link is already in offline "
+				"Ignoring Fcoe linkdown event on portid %d\n",
+				 portid);
+			CSIO_INC_STATS(ln, n_evt_drop);
+			return;
+		}
+
+		/* Verify portid */
+		if (fp->portid != portid) {
+			csio_ln_warn(ln,
+				"warn: FCOE linkdown recv with "
+				"invalid port %d\n", portid);
+			CSIO_INC_STATS(ln, n_evt_drop);
+			return;
+		}
+
+		/* verify fcfi */
+		if (ln->fcf_flowid != fcfi) {
+			csio_ln_warn(ln,
+				"warn: FCOE linkdown recv with "
+				"invalid fcfi x%x\n", fcfi);
+			CSIO_INC_STATS(ln, n_evt_drop);
+			return;
+		}
+
+		csio_info(hw, "Port:%d - FCOE LINK DOWN\n", portid);
+
+		/* Send LINK_DOWN event to lnode s/m */
+		csio_ln_down(ln);
+
+		return;
+	} else {
+		csio_warn(hw,
+			  "warn: FCOE linkdown recv with invalid vnpi x%x\n",
+			  vnpi);
+		CSIO_INC_STATS(hw, n_evt_drop);
+	}
+}
+
+/*
+ * csio_is_lnode_ready - Checks FCOE lnode is in ready state.
+ * @ln: Lnode module
+ *
+ * Returns True if FCOE lnode is in ready state.
+ */
+int
+csio_is_lnode_ready(struct csio_lnode *ln)
+{
+	return (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_ready));
+}
+
+/*****************************************************************************/
+/* START: Lnode SM                                                           */
+/*****************************************************************************/
+/*
+ * csio_lns_uninit - The request in uninit state.
+ * @ln - FCOE lnode.
+ * @evt - Event to be processed.
+ *
+ * Process the given lnode event which is currently in "uninit" state.
+ * Invoked with HW lock held.
+ * Return - none.
+ */
+static void
+csio_lns_uninit(struct csio_lnode *ln, enum csio_ln_ev evt)
+{
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+	struct csio_lnode *rln = hw->rln;
+	int rv;
+
+	CSIO_INC_STATS(ln, n_evt_sm[evt]);
+	switch (evt) {
+	case CSIO_LNE_LINKUP:
+		csio_set_state(&ln->sm, csio_lns_online);
+		/* Read FCF only for physical lnode */
+		if (csio_is_phys_ln(ln)) {
+			rv = csio_ln_read_fcf_entry(ln,
+					csio_ln_read_fcf_cbfn);
+			if (rv != 0) {
+				/* TODO: Send HW RESET event */
+				CSIO_INC_STATS(ln, n_err);
+				break;
+			}
+
+			/* Add FCF record */
+			list_add_tail(&ln->fcfinfo->list, &rln->fcf_lsthead);
+		}
+
+		rv = csio_ln_vnp_read(ln, csio_ln_vnp_read_cbfn);
+		if (rv != 0) {
+			/* TODO: Send HW RESET event */
+			CSIO_INC_STATS(ln, n_err);
+		}
+		break;
+
+	case CSIO_LNE_DOWN_LINK:
+		break;
+
+	default:
+		csio_ln_dbg(ln,
+			    "unexp ln event %d recv from did:x%x in "
+			    "ln state[uninit].\n", evt, ln->nport_id);
+		CSIO_INC_STATS(ln, n_evt_unexp);
+		break;
+	} /* switch event */
+}
+
+/*
+ * csio_lns_online - The request in online state.
+ * @ln - FCOE lnode.
+ * @evt - Event to be processed.
+ *
+ * Process the given lnode event which is currently in "online" state.
+ * Invoked with HW lock held.
+ * Return - none.
+ */
+static void
+csio_lns_online(struct csio_lnode *ln, enum csio_ln_ev evt)
+{
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+
+	CSIO_INC_STATS(ln, n_evt_sm[evt]);
+	switch (evt) {
+	case CSIO_LNE_LINKUP:
+		csio_ln_warn(ln,
+			     "warn: FCOE link is up already "
+			     "Ignoring linkup on port:%d\n", ln->portid);
+		CSIO_INC_STATS(ln, n_evt_drop);
+		break;
+
+	case CSIO_LNE_FAB_INIT_DONE:
+		csio_set_state(&ln->sm, csio_lns_ready);
+
+		spin_unlock_irq(&hw->lock);
+		csio_lnode_async_event(ln, CSIO_LN_FC_LINKUP);
+		spin_lock_irq(&hw->lock);
+
+		break;
+
+	case CSIO_LNE_LINK_DOWN:
+		/* Fall through */
+	case CSIO_LNE_DOWN_LINK:
+		csio_set_state(&ln->sm, csio_lns_uninit);
+		if (csio_is_phys_ln(ln)) {
+			/* Remove FCF entry */
+			list_del_init(&ln->fcfinfo->list);
+		}
+		break;
+
+	default:
+		csio_ln_dbg(ln,
+			    "unexp ln event %d recv from did:x%x in "
+			    "ln state[uninit].\n", evt, ln->nport_id);
+		CSIO_INC_STATS(ln, n_evt_unexp);
+
+		break;
+	} /* switch event */
+}
+
+/*
+ * csio_lns_ready - The request in ready state.
+ * @ln - FCOE lnode.
+ * @evt - Event to be processed.
+ *
+ * Process the given lnode event which is currently in "ready" state.
+ * Invoked with HW lock held.
+ * Return - none.
+ */
+static void
+csio_lns_ready(struct csio_lnode *ln, enum csio_ln_ev evt)
+{
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+
+	CSIO_INC_STATS(ln, n_evt_sm[evt]);
+	switch (evt) {
+	case CSIO_LNE_FAB_INIT_DONE:
+		csio_ln_err(ln,
+			    "ignoring event %d recv from did x%x"
+			    "in ln state[ready].\n", evt, ln->nport_id);
+		CSIO_INC_STATS(ln, n_evt_drop);
+		break;
+
+	case CSIO_LNE_LINK_DOWN:
+		csio_set_state(&ln->sm, csio_lns_offline);
+		csio_post_event_rns(ln, CSIO_RNFE_DOWN);
+
+		spin_unlock_irq(&hw->lock);
+		csio_lnode_async_event(ln, CSIO_LN_FC_LINKDOWN);
+		spin_lock_irq(&hw->lock);
+
+		if (csio_is_phys_ln(ln)) {
+			/* Remove FCF entry */
+			list_del_init(&ln->fcfinfo->list);
+		}
+		break;
+
+	case CSIO_LNE_DOWN_LINK:
+		csio_set_state(&ln->sm, csio_lns_offline);
+		csio_post_event_rns(ln, CSIO_RNFE_DOWN);
+
+		/* Host need to issue aborts in case if FW has not returned
+		 * WRs with status "ABORTED"
+		 */
+		spin_unlock_irq(&hw->lock);
+		csio_lnode_async_event(ln, CSIO_LN_FC_LINKDOWN);
+		spin_lock_irq(&hw->lock);
+
+		if (csio_is_phys_ln(ln)) {
+			/* Remove FCF entry */
+			list_del_init(&ln->fcfinfo->list);
+		}
+		break;
+
+	case CSIO_LNE_CLOSE:
+		csio_set_state(&ln->sm, csio_lns_uninit);
+		csio_post_event_rns(ln, CSIO_RNFE_CLOSE);
+		break;
+
+	case CSIO_LNE_LOGO:
+		csio_set_state(&ln->sm, csio_lns_offline);
+		csio_post_event_rns(ln, CSIO_RNFE_DOWN);
+		break;
+
+	default:
+		csio_ln_dbg(ln,
+			    "unexp ln event %d recv from did:x%x in "
+			    "ln state[uninit].\n", evt, ln->nport_id);
+		CSIO_INC_STATS(ln, n_evt_unexp);
+		CSIO_DB_ASSERT(0);
+		break;
+	} /* switch event */
+}
+
+/*
+ * csio_lns_offline - The request in offline state.
+ * @ln - FCOE lnode.
+ * @evt - Event to be processed.
+ *
+ * Process the given lnode event which is currently in "offline" state.
+ * Invoked with HW lock held.
+ * Return - none.
+ */
+static void
+csio_lns_offline(struct csio_lnode *ln, enum csio_ln_ev evt)
+{
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+	struct csio_lnode *rln = hw->rln;
+	int rv;
+
+	CSIO_INC_STATS(ln, n_evt_sm[evt]);
+	switch (evt) {
+	case CSIO_LNE_LINKUP:
+		csio_set_state(&ln->sm, csio_lns_online);
+		/* Read FCF only for physical lnode */
+		if (csio_is_phys_ln(ln)) {
+			rv = csio_ln_read_fcf_entry(ln,
+					csio_ln_read_fcf_cbfn);
+			if (rv != 0) {
+				/* TODO: Send HW RESET event */
+				CSIO_INC_STATS(ln, n_err);
+				break;
+			}
+
+			/* Add FCF record */
+			list_add_tail(&ln->fcfinfo->list, &rln->fcf_lsthead);
+		}
+
+		rv = csio_ln_vnp_read(ln, csio_ln_vnp_read_cbfn);
+		if (rv != 0) {
+			/* TODO: Send HW RESET event */
+			CSIO_INC_STATS(ln, n_err);
+		}
+		break;
+
+	case CSIO_LNE_LINK_DOWN:
+	case CSIO_LNE_DOWN_LINK:
+	case CSIO_LNE_LOGO:
+		csio_ln_err(ln,
+			    "ignoring event %d recv from did x%x"
+			    "in ln state[offline].\n", evt, ln->nport_id);
+		CSIO_INC_STATS(ln, n_evt_drop);
+		break;
+
+	case CSIO_LNE_CLOSE:
+		csio_set_state(&ln->sm, csio_lns_uninit);
+		csio_post_event_rns(ln, CSIO_RNFE_CLOSE);
+		break;
+
+	default:
+		csio_ln_dbg(ln,
+			    "unexp ln event %d recv from did:x%x in "
+			    "ln state[offline]\n", evt, ln->nport_id);
+		CSIO_INC_STATS(ln, n_evt_unexp);
+		CSIO_DB_ASSERT(0);
+		break;
+	} /* switch event */
+}
+
+/*****************************************************************************/
+/* END: Lnode SM                                                             */
+/*****************************************************************************/
+
+static void
+csio_free_fcfinfo(struct kref *kref)
+{
+	struct csio_fcf_info *fcfinfo = container_of(kref,
+						struct csio_fcf_info, kref);
+	kfree(fcfinfo);
+}
+
+/* Helper routines for attributes  */
+/*
+ * csio_lnode_state_to_str - Get current state of FCOE lnode.
+ * @ln - lnode
+ * @str - state of lnode.
+ *
+ */
+void
+csio_lnode_state_to_str(struct csio_lnode *ln, int8_t *str)
+{
+	if (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_uninit)) {
+		strcpy(str, "UNINIT");
+		return;
+	}
+	if (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_ready)) {
+		strcpy(str, "READY");
+		return;
+	}
+	if (csio_get_state(ln) == ((csio_sm_state_t)csio_lns_offline)) {
+		strcpy(str, "OFFLINE");
+		return;
+	}
+	strcpy(str, "UNKNOWN");
+} /* csio_lnode_state_to_str */
+
+
+int
+csio_get_phy_port_stats(struct csio_hw *hw, uint8_t portid,
+			struct fw_fcoe_port_stats *port_stats)
+{
+	struct csio_mb  *mbp;
+	struct fw_fcoe_port_cmd_params portparams;
+	enum fw_retval retval;
+	int idx;
+
+	mbp = mempool_alloc(hw->mb_mempool, GFP_ATOMIC);
+	if (!mbp) {
+		csio_err(hw, "FCoE FCF PARAMS command out of memory!\n");
+		return -EINVAL;
+	}
+	portparams.portid = portid;
+
+	for (idx = 1; idx <= 3; idx++) {
+		portparams.idx = (idx-1)*6 + 1;
+		portparams.nstats = 6;
+		if (idx == 3)
+			portparams.nstats = 4;
+		csio_fcoe_read_portparams_init_mb(hw, mbp, CSIO_MB_DEFAULT_TMO,
+							&portparams, NULL);
+		if (csio_mb_issue(hw, mbp)) {
+			csio_err(hw, "Issue of FCoE port params failed!\n");
+			mempool_free(mbp, hw->mb_mempool);
+			return -EINVAL;
+		}
+		csio_mb_process_portparams_rsp(hw, mbp, &retval,
+						&portparams, port_stats);
+	}
+
+	mempool_free(mbp, hw->mb_mempool);
+	return 0;
+}
+
+/*
+ * csio_ln_mgmt_wr_handler -Mgmt Work Request handler.
+ * @wr - WR.
+ * @len - WR len.
+ * This handler is invoked when an outstanding mgmt WR is completed.
+ * Its invoked in the context of FW event worker thread for every
+ * mgmt event received.
+ * Return - none.
+ */
+
+static void
+csio_ln_mgmt_wr_handler(struct csio_hw *hw, void *wr, uint32_t len)
+{
+	struct csio_mgmtm *mgmtm = csio_hw_to_mgmtm(hw);
+	struct csio_ioreq *io_req = NULL;
+	struct fw_fcoe_els_ct_wr *wr_cmd;
+
+
+	wr_cmd = (struct fw_fcoe_els_ct_wr *) wr;
+
+	if (len < sizeof(struct fw_fcoe_els_ct_wr)) {
+		csio_err(mgmtm->hw,
+			 "Invalid ELS CT WR length recvd, len:%x\n", len);
+		mgmtm->stats.n_err++;
+		return;
+	}
+
+	io_req = (struct csio_ioreq *) ((uintptr_t) wr_cmd->cookie);
+	io_req->wr_status = csio_wr_status(wr_cmd);
+
+	/* lookup ioreq exists in our active Q */
+	spin_lock_irq(&hw->lock);
+	if (csio_mgmt_req_lookup(mgmtm, io_req) != 0) {
+		csio_err(mgmtm->hw,
+			"Error- Invalid IO handle recv in WR. handle: %p\n",
+			io_req);
+		mgmtm->stats.n_err++;
+		spin_unlock_irq(&hw->lock);
+		return;
+	}
+
+	mgmtm = csio_hw_to_mgmtm(hw);
+
+	/* Dequeue from active queue */
+	list_del_init(&io_req->sm.sm_list);
+	mgmtm->stats.n_active--;
+	spin_unlock_irq(&hw->lock);
+
+	/* io_req will be freed by completion handler */
+	if (io_req->io_cbfn)
+		io_req->io_cbfn(hw, io_req);
+}
+
+/**
+ * csio_fcoe_fwevt_handler - Event handler for Firmware FCoE events.
+ * @hw:		HW module
+ * @cpl_op:	CPL opcode
+ * @cmd:	FW cmd/WR.
+ *
+ * Process received FCoE cmd/WR event from FW.
+ */
+void
+csio_fcoe_fwevt_handler(struct csio_hw *hw, __u8 cpl_op, __be64 *cmd)
+{
+	struct csio_lnode *ln;
+	struct csio_rnode *rn;
+	uint8_t portid, opcode = *(uint8_t *)cmd;
+	struct fw_fcoe_link_cmd *lcmd;
+	struct fw_wr_hdr *wr;
+	struct fw_rdev_wr *rdev_wr;
+	enum fw_fcoe_link_status lstatus;
+	uint32_t fcfi, rdev_flowid, vnpi;
+	enum csio_ln_ev evt;
+
+	if (cpl_op == CPL_FW6_MSG && opcode == FW_FCOE_LINK_CMD) {
+
+		lcmd = (struct fw_fcoe_link_cmd *)cmd;
+		lstatus = lcmd->lstatus;
+		portid = FW_FCOE_LINK_CMD_PORTID_GET(
+					ntohl(lcmd->op_to_portid));
+		fcfi = FW_FCOE_LINK_CMD_FCFI_GET(ntohl(lcmd->sub_opcode_fcfi));
+		vnpi = FW_FCOE_LINK_CMD_VNPI_GET(ntohl(lcmd->vnpi_pkd));
+
+		if (lstatus == FCOE_LINKUP) {
+
+			/* HW lock here */
+			spin_lock_irq(&hw->lock);
+			csio_handle_link_up(hw, portid, fcfi, vnpi);
+			spin_unlock_irq(&hw->lock);
+			/* HW un lock here */
+
+		} else if (lstatus == FCOE_LINKDOWN) {
+
+			/* HW lock here */
+			spin_lock_irq(&hw->lock);
+			csio_handle_link_down(hw, portid, fcfi, vnpi);
+			spin_unlock_irq(&hw->lock);
+			/* HW un lock here */
+		} else {
+			csio_warn(hw, "Unexpected FCOE LINK status:0x%x\n",
+				    ntohl(lcmd->lstatus));
+			CSIO_INC_STATS(hw, n_cpl_unexp);
+		}
+	} else if (cpl_op == CPL_FW6_PLD) {
+		wr = (struct fw_wr_hdr *) (cmd + 4);
+		if (FW_WR_OP_GET(be32_to_cpu(wr->hi))
+			== FW_RDEV_WR) {
+
+			rdev_wr = (struct fw_rdev_wr *) (cmd + 4);
+
+			rdev_flowid = FW_RDEV_WR_FLOWID_GET(
+					ntohl(rdev_wr->alloc_to_len16));
+			vnpi = FW_RDEV_WR_ASSOC_FLOWID_GET(
+				    ntohl(rdev_wr->flags_to_assoc_flowid));
+
+			csio_dbg(hw,
+				"FW_RDEV_WR: flowid:x%x ev_cause:x%x "
+				"vnpi:0x%x\n", rdev_flowid,
+				rdev_wr->event_cause, vnpi);
+
+			if (rdev_wr->protocol != PROT_FCOE) {
+				csio_err(hw,
+					"FW_RDEV_WR: invalid proto:x%x "
+					"received with flowid:x%x\n",
+					rdev_wr->protocol,
+					rdev_flowid);
+				CSIO_INC_STATS(hw, n_evt_drop);
+				return;
+			}
+
+			/* HW lock here */
+			spin_lock_irq(&hw->lock);
+			ln = csio_ln_lookup_by_vnpi(hw, vnpi);
+			if (!ln) {
+				csio_err(hw,
+					"FW_DEV_WR: invalid vnpi:x%x received "
+					"with flowid:x%x\n", vnpi, rdev_flowid);
+				CSIO_INC_STATS(hw, n_evt_drop);
+				spin_unlock_irq(&hw->lock);
+				return;
+			}
+
+			rn = csio_confirm_rnode(ln, rdev_flowid,
+					&rdev_wr->u.fcoe_rdev);
+			if (!rn) {
+				csio_ln_dbg(ln,
+					"Failed to confirm rnode "
+					"for flowid:x%x\n", rdev_flowid);
+				CSIO_INC_STATS(hw, n_evt_drop);
+				spin_unlock_irq(&hw->lock);
+				return;
+			}
+
+			/* save previous event for debugging */
+			ln->prev_evt = ln->cur_evt;
+			ln->cur_evt = rdev_wr->event_cause;
+			CSIO_INC_STATS(ln, n_evt_fw[rdev_wr->event_cause]);
+
+			/* Translate all the fabric events to lnode SM events */
+			evt = CSIO_FWE_TO_LNE(rdev_wr->event_cause);
+			if (evt) {
+				csio_ln_dbg(ln,
+					"Posting event to lnode event:%d "
+					"cause:%d flowid:x%x\n", evt,
+					rdev_wr->event_cause, rdev_flowid);
+				csio_post_event(&ln->sm, evt);
+			}
+
+			/* Handover event to rn SM here. */
+			csio_rnode_fwevt_handler(rn, rdev_wr->event_cause);
+
+			spin_unlock_irq(&hw->lock);
+		} else {
+			csio_warn(hw, "unexpected WR op(0x%x) recv\n",
+				FW_WR_OP_GET(be32_to_cpu((wr->hi))));
+			CSIO_INC_STATS(hw, n_cpl_unexp);
+		}
+	} else if (cpl_op == CPL_FW6_MSG) {
+		wr = (struct fw_wr_hdr *) (cmd);
+		if (FW_WR_OP_GET(be32_to_cpu(wr->hi)) == FW_FCOE_ELS_CT_WR) {
+			csio_ln_mgmt_wr_handler(hw, wr,
+					sizeof(struct fw_fcoe_els_ct_wr));
+		} else {
+			csio_warn(hw, "unexpected WR op(0x%x) recv\n",
+				FW_WR_OP_GET(be32_to_cpu((wr->hi))));
+			CSIO_INC_STATS(hw, n_cpl_unexp);
+		}
+	} else {
+		csio_warn(hw, "unexpected CPL op(0x%x) recv\n", opcode);
+		CSIO_INC_STATS(hw, n_cpl_unexp);
+	}
+}
+
+/**
+ * csio_lnode_start - Kickstart lnode discovery.
+ * @ln:		lnode
+ *
+ * This routine kickstarts the discovery by issuing an FCOE_LINK (up) command.
+ */
+int
+csio_lnode_start(struct csio_lnode *ln)
+{
+	int rv = 0;
+	if (csio_is_phys_ln(ln) && !(ln->flags & CSIO_LNF_LINK_ENABLE)) {
+		rv = csio_fcoe_enable_link(ln, 1);
+		ln->flags |= CSIO_LNF_LINK_ENABLE;
+	}
+
+	return rv;
+}
+
+/**
+ * csio_lnode_stop - Stop the lnode.
+ * @ln:		lnode
+ *
+ * This routine is invoked by HW module to stop lnode and its associated NPIV
+ * lnodes.
+ */
+void
+csio_lnode_stop(struct csio_lnode *ln)
+{
+	csio_post_event_lns(ln, CSIO_LNE_DOWN_LINK);
+	if (csio_is_phys_ln(ln) && (ln->flags & CSIO_LNF_LINK_ENABLE)) {
+		csio_fcoe_enable_link(ln, 0);
+		ln->flags &= ~CSIO_LNF_LINK_ENABLE;
+	}
+	csio_ln_dbg(ln, "stopping ln :%p\n", ln);
+}
+
+/**
+ * csio_lnode_close - Close an lnode.
+ * @ln:		lnode
+ *
+ * This routine is invoked by HW module to close an lnode and its
+ * associated NPIV lnodes. Lnode and its associated NPIV lnodes are
+ * set to uninitialized state.
+ */
+void
+csio_lnode_close(struct csio_lnode *ln)
+{
+	csio_post_event_lns(ln, CSIO_LNE_CLOSE);
+	if (csio_is_phys_ln(ln))
+		ln->vnp_flowid = CSIO_INVALID_IDX;
+
+	csio_ln_dbg(ln, "closed ln :%p\n", ln);
+}
+
+/*
+ * csio_ln_prep_ecwr - Prepare ELS/CT WR.
+ * @io_req - IO request.
+ * @wr_len - WR len
+ * @immd_len - WR immediate data
+ * @sub_op - Sub opcode
+ * @sid - source portid.
+ * @did - destination portid
+ * @flow_id - flowid
+ * @fw_wr - ELS/CT WR to be prepared.
+ * Returns: 0 - on success
+ */
+static int
+csio_ln_prep_ecwr(struct csio_ioreq *io_req, uint32_t wr_len,
+		      uint32_t immd_len, uint8_t sub_op, uint32_t sid,
+		      uint32_t did, uint32_t flow_id, uint8_t *fw_wr)
+{
+	struct fw_fcoe_els_ct_wr *wr;
+	uint32_t port_id;
+
+	wr  = (struct fw_fcoe_els_ct_wr *)fw_wr;
+	wr->op_immdlen = cpu_to_be32(FW_WR_OP(FW_FCOE_ELS_CT_WR) |
+				     FW_FCOE_ELS_CT_WR_IMMDLEN(immd_len));
+
+	wr_len =  DIV_ROUND_UP(wr_len, 16);
+	wr->flowid_len16 = cpu_to_be32(FW_WR_FLOWID(flow_id) |
+					  FW_WR_LEN16(wr_len));
+	wr->els_ct_type = sub_op;
+	wr->ctl_pri = 0;
+	wr->cp_en_class = 0;
+	wr->cookie = io_req->fw_handle;
+	wr->iqid = (uint16_t)cpu_to_be16(csio_q_physiqid(
+			io_req->lnode->hwp, io_req->iq_idx));
+	wr->fl_to_sp =  FW_FCOE_ELS_CT_WR_SP(1);
+	wr->tmo_val = (uint8_t) io_req->tmo;
+	port_id = htonl(sid);
+	memcpy(wr->l_id, PORT_ID_PTR(port_id), 3);
+	port_id = htonl(did);
+	memcpy(wr->r_id, PORT_ID_PTR(port_id), 3);
+
+	/* Prepare RSP SGL */
+	wr->rsp_dmalen = cpu_to_be32(io_req->dma_buf.len);
+	wr->rsp_dmaaddr = cpu_to_be64(io_req->dma_buf.paddr);
+	return 0;
+}
+
+/*
+ * csio_ln_mgmt_submit_wr - Post elsct work request.
+ * @mgmtm - mgmtm
+ * @io_req - io request.
+ * @sub_op - ELS or CT request type
+ * @pld - Dma Payload buffer
+ * @pld_len - Payload len
+ * Prepares ELSCT Work request and sents it to FW.
+ * Returns: 0 - on success
+ */
+static int
+csio_ln_mgmt_submit_wr(struct csio_mgmtm *mgmtm, struct csio_ioreq *io_req,
+		uint8_t sub_op, struct csio_dma_buf *pld,
+		uint32_t pld_len)
+{
+	struct csio_wr_pair wrp;
+	struct csio_lnode *ln = io_req->lnode;
+	struct csio_rnode *rn = io_req->rnode;
+	struct	csio_hw	*hw = mgmtm->hw;
+	uint8_t fw_wr[64];
+	struct ulptx_sgl dsgl;
+	uint32_t wr_size = 0;
+	uint8_t im_len = 0;
+	uint32_t wr_off = 0;
+
+	int ret = 0;
+
+	/* Calculate WR Size for this ELS REQ */
+	wr_size = sizeof(struct fw_fcoe_els_ct_wr);
+
+	/* Send as immediate data if pld < 256 */
+	if (pld_len < 256) {
+		wr_size += ALIGN(pld_len, 8);
+		im_len = (uint8_t)pld_len;
+	} else
+		wr_size += sizeof(struct ulptx_sgl);
+
+	/* Roundup WR size in units of 16 bytes */
+	wr_size = ALIGN(wr_size, 16);
+
+	/* Get WR to send ELS REQ */
+	ret = csio_wr_get(hw, mgmtm->eq_idx, wr_size, &wrp);
+	if (ret != 0) {
+		csio_err(hw, "Failed to get WR for ec_req %p ret:%d\n",
+			io_req, ret);
+		return ret;
+	}
+
+	/* Prepare Generic WR used by all ELS/CT cmd */
+	csio_ln_prep_ecwr(io_req, wr_size, im_len, sub_op,
+				ln->nport_id, rn->nport_id,
+				csio_rn_flowid(rn),
+				&fw_wr[0]);
+
+	/* Copy ELS/CT WR CMD */
+	csio_wr_copy_to_wrp(&fw_wr[0], &wrp, wr_off,
+			sizeof(struct fw_fcoe_els_ct_wr));
+	wr_off += sizeof(struct fw_fcoe_els_ct_wr);
+
+	/* Copy payload to Immediate section of WR */
+	if (im_len)
+		csio_wr_copy_to_wrp(pld->vaddr, &wrp, wr_off, im_len);
+	else {
+		/* Program DSGL to dma payload */
+		dsgl.cmd_nsge = htonl(ULPTX_CMD(ULP_TX_SC_DSGL) |
+					ULPTX_MORE | ULPTX_NSGE(1));
+		dsgl.len0 = cpu_to_be32(pld_len);
+		dsgl.addr0 = cpu_to_be64(pld->paddr);
+		csio_wr_copy_to_wrp(&dsgl, &wrp, ALIGN(wr_off, 8),
+				   sizeof(struct ulptx_sgl));
+	}
+
+	/* Issue work request to xmit ELS/CT req to FW */
+	csio_wr_issue(mgmtm->hw, mgmtm->eq_idx, false);
+	return ret;
+}
+
+/*
+ * csio_ln_mgmt_submit_req - Submit FCOE Mgmt request.
+ * @io_req - IO Request
+ * @io_cbfn - Completion handler.
+ * @req_type - ELS or CT request type
+ * @pld - Dma Payload buffer
+ * @pld_len - Payload len
+ *
+ *
+ * This API used submit managment ELS/CT request.
+ * This called with hw lock held
+ * Returns: 0 - on success
+ *	    -ENOMEM	- on error.
+ */
+static int
+csio_ln_mgmt_submit_req(struct csio_ioreq *io_req,
+		void (*io_cbfn) (struct csio_hw *, struct csio_ioreq *),
+		enum fcoe_cmn_type req_type, struct csio_dma_buf *pld,
+		uint32_t pld_len)
+{
+	struct csio_hw *hw = csio_lnode_to_hw(io_req->lnode);
+	struct csio_mgmtm *mgmtm = csio_hw_to_mgmtm(hw);
+	int rv;
+
+	io_req->io_cbfn = io_cbfn;	/* Upper layer callback handler */
+	io_req->fw_handle = (uintptr_t) (io_req);
+	io_req->eq_idx = mgmtm->eq_idx;
+	io_req->iq_idx = mgmtm->iq_idx;
+
+	rv = csio_ln_mgmt_submit_wr(mgmtm, io_req, req_type, pld, pld_len);
+	if (rv == 0) {
+		list_add_tail(&io_req->sm.sm_list, &mgmtm->active_q);
+		mgmtm->stats.n_active++;
+	}
+	return rv;
+}
+
+/*
+ * csio_ln_fdmi_init - FDMI Init entry point.
+ * @ln: lnode
+ */
+static int
+csio_ln_fdmi_init(struct csio_lnode *ln)
+{
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+	struct csio_dma_buf	*dma_buf;
+
+	/* Allocate MGMT request required for FDMI */
+	ln->mgmt_req = kzalloc(sizeof(struct csio_ioreq), GFP_KERNEL);
+	if (!ln->mgmt_req) {
+		csio_ln_err(ln, "Failed to alloc ioreq for FDMI\n");
+		CSIO_INC_STATS(hw, n_err_nomem);
+		return -ENOMEM;
+	}
+
+	/* Allocate Dma buffers for FDMI response Payload */
+	dma_buf = &ln->mgmt_req->dma_buf;
+	dma_buf->len = 2048;
+	dma_buf->vaddr = pci_alloc_consistent(hw->pdev, dma_buf->len,
+						&dma_buf->paddr);
+	if (!dma_buf->vaddr) {
+		csio_err(hw, "Failed to alloc DMA buffer for FDMI!\n");
+		kfree(ln->mgmt_req);
+		ln->mgmt_req = NULL;
+		return -ENOMEM;
+	}
+
+	ln->flags |= CSIO_LNF_FDMI_ENABLE;
+	return 0;
+}
+
+/*
+ * csio_ln_fdmi_exit - FDMI exit entry point.
+ * @ln: lnode
+ */
+static int
+csio_ln_fdmi_exit(struct csio_lnode *ln)
+{
+	struct csio_dma_buf *dma_buf;
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+
+	if (!ln->mgmt_req)
+		return 0;
+
+	dma_buf = &ln->mgmt_req->dma_buf;
+	if (dma_buf->vaddr)
+		pci_free_consistent(hw->pdev, dma_buf->len, dma_buf->vaddr,
+				    dma_buf->paddr);
+
+	kfree(ln->mgmt_req);
+	return 0;
+}
+
+int
+csio_scan_done(struct csio_lnode *ln, unsigned long ticks,
+		unsigned long time, unsigned long max_scan_ticks,
+		unsigned long delta_scan_ticks)
+{
+	int rv = 0;
+
+	if (time >= max_scan_ticks)
+		return 1;
+
+	if (!ln->tgt_scan_tick)
+		ln->tgt_scan_tick = ticks;
+
+	if (((ticks - ln->tgt_scan_tick) >= delta_scan_ticks)) {
+		if (!ln->last_scan_ntgts)
+			ln->last_scan_ntgts = ln->n_scsi_tgts;
+		else {
+			if (ln->last_scan_ntgts == ln->n_scsi_tgts)
+				return 1;
+
+			ln->last_scan_ntgts = ln->n_scsi_tgts;
+		}
+		ln->tgt_scan_tick = ticks;
+	}
+	return rv;
+}
+
+/*
+ * csio_notify_lnodes:
+ * @hw: HW module
+ * @note: Notification
+ *
+ * Called from the HW SM to fan out notifications to the
+ * Lnode SM. Since the HW SM is entered with lock held,
+ * there is no need to hold locks here.
+ *
+ */
+void
+csio_notify_lnodes(struct csio_hw *hw, enum csio_ln_notify note)
+{
+	struct list_head *tmp;
+	struct csio_lnode *ln;
+
+	csio_dbg(hw, "Notifying all nodes of event %d\n", note);
+
+	/* Traverse children lnodes list and send evt */
+	list_for_each(tmp, &hw->sln_head) {
+		ln = (struct csio_lnode *) tmp;
+
+		switch (note) {
+		case CSIO_LN_NOTIFY_HWREADY:
+			csio_lnode_start(ln);
+			break;
+
+		case CSIO_LN_NOTIFY_HWRESET:
+		case CSIO_LN_NOTIFY_HWREMOVE:
+			csio_lnode_close(ln);
+			break;
+
+		case CSIO_LN_NOTIFY_HWSTOP:
+			csio_lnode_stop(ln);
+			break;
+
+		default:
+			break;
+
+		}
+	}
+}
+
+/*
+ * csio_disable_lnodes:
+ * @hw: HW module
+ * @portid:port id
+ * @disable: disable/enable flag.
+ * If disable=1, disables all lnode hosted on given physical port.
+ * otherwise enables all the lnodes on given phsysical port.
+ * This routine need to called with hw lock held.
+ */
+void
+csio_disable_lnodes(struct csio_hw *hw, uint8_t portid, bool disable)
+{
+	struct list_head *tmp;
+	struct csio_lnode *ln;
+
+	csio_dbg(hw, "Notifying event to all nodes of port:%d\n", portid);
+
+	/* Traverse sibling lnodes list and send evt */
+	list_for_each(tmp, &hw->sln_head) {
+		ln = (struct csio_lnode *) tmp;
+		if (ln->portid != portid)
+			continue;
+
+		if (disable)
+			csio_lnode_stop(ln);
+		else
+			csio_lnode_start(ln);
+	}
+}
+
+/*
+ * csio_ln_init - Initialize an lnode.
+ * @ln:		lnode
+ *
+ */
+static int
+csio_ln_init(struct csio_lnode *ln)
+{
+	int rv = -EINVAL;
+	struct csio_lnode *rln, *pln;
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+
+	csio_init_state(&ln->sm, csio_lns_uninit);
+	ln->vnp_flowid = CSIO_INVALID_IDX;
+	ln->fcf_flowid = CSIO_INVALID_IDX;
+
+	if (csio_is_root_ln(ln)) {
+
+		/* This is the lnode used during initialization */
+
+		ln->fcfinfo = kzalloc(sizeof(struct csio_fcf_info), GFP_KERNEL);
+		if (!ln->fcfinfo) {
+			csio_ln_err(ln, "Failed to alloc FCF record\n");
+			CSIO_INC_STATS(hw, n_err_nomem);
+			goto err;
+		}
+
+		INIT_LIST_HEAD(&ln->fcf_lsthead);
+		kref_init(&ln->fcfinfo->kref);
+
+		if (csio_fdmi_enable && csio_ln_fdmi_init(ln))
+			goto err;
+
+	} else { /* Either a non-root physical or a virtual lnode */
+
+		/*
+		 * THe rest is common for non-root physical and NPIV lnodes.
+		 * Just get references to all other modules
+		 */
+		rln = csio_root_lnode(ln);
+
+		if (csio_is_npiv_ln(ln)) {
+			/* NPIV */
+			pln = csio_parent_lnode(ln);
+			kref_get(&pln->fcfinfo->kref);
+			ln->fcfinfo = pln->fcfinfo;
+		} else {
+			/* Another non-root physical lnode (FCF) */
+			ln->fcfinfo = kzalloc(sizeof(struct csio_fcf_info),
+								GFP_KERNEL);
+			if (!ln->fcfinfo) {
+				csio_ln_err(ln,
+					"Failed to alloc FCF info\n");
+				CSIO_INC_STATS(hw, n_err_nomem);
+				goto err;
+			}
+
+			kref_init(&ln->fcfinfo->kref);
+
+			if (csio_fdmi_enable && csio_ln_fdmi_init(ln))
+				goto err;
+		}
+
+	} /* if (!csio_is_root_ln(ln)) */
+
+	return 0;
+err:
+	return rv;
+}
+
+static void
+csio_ln_exit(struct csio_lnode *ln)
+{
+	struct csio_lnode *pln;
+
+	csio_cleanup_rns(ln);
+	if (csio_is_npiv_ln(ln)) {
+		pln = csio_parent_lnode(ln);
+		kref_put(&pln->fcfinfo->kref, csio_free_fcfinfo);
+	} else {
+		kref_put(&ln->fcfinfo->kref, csio_free_fcfinfo);
+		if (csio_fdmi_enable)
+			csio_ln_fdmi_exit(ln);
+	}
+	ln->fcfinfo = NULL;
+}
+
+/**
+ * csio_lnode_init - Initialize the members of an lnode.
+ * @ln:		lnode
+ *
+ */
+int
+csio_lnode_init(struct csio_lnode *ln, struct csio_hw *hw,
+		struct csio_lnode *pln)
+{
+	int rv = -EINVAL;
+
+	/* Link this lnode to hw */
+	csio_lnode_to_hw(ln)	= hw;
+
+	/* Link child to parent if child lnode */
+	if (pln)
+		ln->pln = pln;
+	else
+		ln->pln = NULL;
+
+	/* Initialize scsi_tgt and timers to zero */
+	ln->n_scsi_tgts = 0;
+	ln->last_scan_ntgts = 0;
+	ln->tgt_scan_tick = 0;
+
+	/* Initialize rnode list */
+	INIT_LIST_HEAD(&ln->rnhead);
+	INIT_LIST_HEAD(&ln->cln_head);
+
+	/* Initialize log level for debug */
+	ln->params.log_level	= hw->params.log_level;
+
+	if (csio_ln_init(ln))
+		goto err;
+
+	/* Add lnode to list of sibling or children lnodes */
+	spin_lock_irq(&hw->lock);
+	list_add_tail(&ln->sm.sm_list, pln ? &pln->cln_head : &hw->sln_head);
+	if (pln)
+		pln->num_vports++;
+	spin_unlock_irq(&hw->lock);
+
+	hw->num_lns++;
+
+	return 0;
+err:
+	csio_lnode_to_hw(ln) = NULL;
+	return rv;
+}
+
+/**
+ * csio_lnode_exit - De-instantiate an lnode.
+ * @ln:		lnode
+ *
+ */
+void
+csio_lnode_exit(struct csio_lnode *ln)
+{
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+
+	csio_ln_exit(ln);
+
+	/* Remove this lnode from hw->sln_head */
+	spin_lock_irq(&hw->lock);
+
+	list_del_init(&ln->sm.sm_list);
+
+	/* If it is children lnode, decrement the
+	 * counter in its parent lnode
+	 */
+	if (ln->pln)
+		ln->pln->num_vports--;
+
+	/* Update root lnode pointer */
+	if (list_empty(&hw->sln_head))
+		hw->rln = NULL;
+	else
+		hw->rln = (struct csio_lnode *)csio_list_next(&hw->sln_head);
+
+	spin_unlock_irq(&hw->lock);
+
+	csio_lnode_to_hw(ln)	= NULL;
+	hw->num_lns--;
+}
diff --git a/drivers/scsi/csiostor/csio_rnode.c b/drivers/scsi/csiostor/csio_rnode.c
new file mode 100644
index 0000000..5e224a0
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_rnode.c
@@ -0,0 +1,889 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/string.h>
+#include <scsi/scsi_transport_fc.h>
+#include <scsi/fc/fc_els.h>
+#include <scsi/fc/fc_fs.h>
+
+#include "csio_hw.h"
+#include "csio_lnode.h"
+#include "csio_rnode.h"
+
+static int csio_rnode_init(struct csio_rnode *, struct csio_lnode *);
+static void csio_rnode_exit(struct csio_rnode *);
+
+/* Static machine forward declarations */
+static void csio_rns_uninit(struct csio_rnode *, enum csio_rn_ev);
+static void csio_rns_ready(struct csio_rnode *, enum csio_rn_ev);
+static void csio_rns_offline(struct csio_rnode *, enum csio_rn_ev);
+static void csio_rns_disappeared(struct csio_rnode *, enum csio_rn_ev);
+
+/* RNF event mapping */
+static enum csio_rn_ev fwevt_to_rnevt[] = {
+	CSIO_RNFE_NONE,		/* None */
+	CSIO_RNFE_LOGGED_IN,	/* PLOGI_ACC_RCVD  */
+	CSIO_RNFE_NONE,		/* PLOGI_RJT_RCVD  */
+	CSIO_RNFE_PLOGI_RECV,	/* PLOGI_RCVD	   */
+	CSIO_RNFE_LOGO_RECV,	/* PLOGO_RCVD	   */
+	CSIO_RNFE_PRLI_DONE,	/* PRLI_ACC_RCVD   */
+	CSIO_RNFE_NONE,		/* PRLI_RJT_RCVD   */
+	CSIO_RNFE_PRLI_RECV,	/* PRLI_RCVD	   */
+	CSIO_RNFE_PRLO_RECV,	/* PRLO_RCVD	   */
+	CSIO_RNFE_NONE,		/* NPORT_ID_CHGD   */
+	CSIO_RNFE_LOGO_RECV,	/* FLOGO_RCVD	   */
+	CSIO_RNFE_NONE,		/* CLR_VIRT_LNK_RCVD */
+	CSIO_RNFE_LOGGED_IN,	/* FLOGI_ACC_RCVD   */
+	CSIO_RNFE_NONE,		/* FLOGI_RJT_RCVD   */
+	CSIO_RNFE_LOGGED_IN,	/* FDISC_ACC_RCVD   */
+	CSIO_RNFE_NONE,		/* FDISC_RJT_RCVD   */
+	CSIO_RNFE_NONE,		/* FLOGI_TMO_MAX_RETRY */
+	CSIO_RNFE_NONE,		/* IMPL_LOGO_ADISC_ACC */
+	CSIO_RNFE_NONE,		/* IMPL_LOGO_ADISC_RJT */
+	CSIO_RNFE_NONE,		/* IMPL_LOGO_ADISC_CNFLT */
+	CSIO_RNFE_NONE,		/* PRLI_TMO		*/
+	CSIO_RNFE_NONE,		/* ADISC_TMO		*/
+	CSIO_RNFE_NAME_MISSING,	/* RSCN_DEV_LOST  */
+	CSIO_RNFE_NONE,		/* SCR_ACC_RCVD	*/
+	CSIO_RNFE_NONE,		/* ADISC_RJT_RCVD */
+	CSIO_RNFE_NONE,		/* LOGO_SNT */
+	CSIO_RNFE_LOGO_RECV,	/* PROTO_ERR_IMPL_LOGO */
+};
+
+#define CSIO_FWE_TO_RNFE(_evt)	((_evt > PROTO_ERR_IMPL_LOGO) ?		\
+						CSIO_RNFE_NONE :	\
+						fwevt_to_rnevt[_evt])
+int
+csio_is_rnode_ready(struct csio_rnode *rn)
+{
+	return csio_match_state(rn, csio_rns_ready);
+}
+
+static int
+csio_is_rnode_uninit(struct csio_rnode *rn)
+{
+	return csio_match_state(rn, csio_rns_uninit);
+}
+
+/*
+ * csio_rn_lookup - Finds the rnode with the given flowid
+ * @ln - lnode
+ * @flowid - flowid.
+ *
+ * Does the rnode lookup on the given lnode and flowid.If no matching entry
+ * found, NULL is returned.
+ */
+static struct csio_rnode *
+csio_rn_lookup(struct csio_lnode *ln, uint32_t flowid)
+{
+	struct csio_rnode *rnhead = (struct csio_rnode *) &ln->rnhead;
+	struct list_head *tmp;
+	struct csio_rnode *rn;
+
+	list_for_each(tmp, &rnhead->sm.sm_list) {
+		rn = (struct csio_rnode *) tmp;
+		if (rn->flowid == flowid)
+			return rn;
+	}
+
+	return NULL;
+}
+
+/*
+ * csio_rn_lookup_wwpn - Finds the rnode with the given wwpn
+ * @ln: lnode
+ * @wwpn: wwpn
+ *
+ * Does the rnode lookup on the given lnode and wwpn. If no matching entry
+ * found, NULL is returned.
+ */
+static struct csio_rnode *
+csio_rn_lookup_wwpn(struct csio_lnode *ln, uint8_t *wwpn)
+{
+	struct csio_rnode *rnhead = (struct csio_rnode *) &ln->rnhead;
+	struct list_head *tmp;
+	struct csio_rnode *rn;
+
+	list_for_each(tmp, &rnhead->sm.sm_list) {
+		rn = (struct csio_rnode *) tmp;
+		if (!memcmp(csio_rn_wwpn(rn), wwpn, 8))
+			return rn;
+	}
+
+	return NULL;
+}
+
+/**
+ * csio_rnode_lookup_portid - Finds the rnode with the given portid
+ * @ln:		lnode
+ * @portid:	port id
+ *
+ * Lookup the rnode list for a given portid. If no matching entry
+ * found, NULL is returned.
+ */
+struct csio_rnode *
+csio_rnode_lookup_portid(struct csio_lnode *ln, uint32_t portid)
+{
+	struct csio_rnode *rnhead = (struct csio_rnode *) &ln->rnhead;
+	struct list_head *tmp;
+	struct csio_rnode *rn;
+
+	list_for_each(tmp, &rnhead->sm.sm_list) {
+		rn = (struct csio_rnode *) tmp;
+		if (rn->nport_id == portid)
+			return rn;
+	}
+
+	return NULL;
+}
+
+static int
+csio_rn_dup_flowid(struct csio_lnode *ln, uint32_t rdev_flowid,
+		    uint32_t *vnp_flowid)
+{
+	struct csio_rnode *rnhead;
+	struct list_head *tmp, *tmp1;
+	struct csio_rnode *rn;
+	struct csio_lnode *ln_tmp;
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+
+	list_for_each(tmp1, &hw->sln_head) {
+		ln_tmp = (struct csio_lnode *) tmp1;
+		if (ln_tmp == ln)
+			continue;
+
+		rnhead = (struct csio_rnode *)&ln_tmp->rnhead;
+		list_for_each(tmp, &rnhead->sm.sm_list) {
+
+			rn = (struct csio_rnode *) tmp;
+			if (csio_is_rnode_ready(rn)) {
+				if (rn->flowid == rdev_flowid) {
+					*vnp_flowid = csio_ln_flowid(ln_tmp);
+					return 1;
+				}
+			}
+		}
+	}
+
+	return 0;
+}
+
+static struct csio_rnode *
+csio_alloc_rnode(struct csio_lnode *ln)
+{
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+
+	struct csio_rnode *rn = mempool_alloc(hw->rnode_mempool, GFP_ATOMIC);
+	if (!rn)
+		goto err;
+
+	memset(rn, 0, sizeof(struct csio_rnode));
+	if (csio_rnode_init(rn, ln))
+		goto err_free;
+
+	CSIO_INC_STATS(ln, n_rnode_alloc);
+
+	return rn;
+
+err_free:
+	mempool_free(rn, hw->rnode_mempool);
+err:
+	CSIO_INC_STATS(ln, n_rnode_nomem);
+	return NULL;
+}
+
+static void
+csio_free_rnode(struct csio_rnode *rn)
+{
+	struct csio_hw *hw = csio_lnode_to_hw(csio_rnode_to_lnode(rn));
+
+	csio_rnode_exit(rn);
+	CSIO_INC_STATS(rn->lnp, n_rnode_free);
+	mempool_free(rn, hw->rnode_mempool);
+}
+
+/*
+ * csio_get_rnode - Gets rnode with the given flowid
+ * @ln - lnode
+ * @flowid - flow id.
+ *
+ * Does the rnode lookup on the given lnode and flowid. If no matching
+ * rnode found, then new rnode with given npid is allocated and returned.
+ */
+static struct csio_rnode *
+csio_get_rnode(struct csio_lnode *ln, uint32_t flowid)
+{
+	struct csio_rnode *rn;
+
+	rn = csio_rn_lookup(ln, flowid);
+	if (!rn) {
+		rn = csio_alloc_rnode(ln);
+		if (!rn)
+			return NULL;
+
+		rn->flowid = flowid;
+	}
+
+	return rn;
+}
+
+/*
+ * csio_put_rnode - Frees the given rnode
+ * @ln - lnode
+ * @flowid - flow id.
+ *
+ * Does the rnode lookup on the given lnode and flowid. If no matching
+ * rnode found, then new rnode with given npid is allocated and returned.
+ */
+void
+csio_put_rnode(struct csio_lnode *ln, struct csio_rnode *rn)
+{
+	CSIO_DB_ASSERT(csio_is_rnode_uninit(rn) != 0);
+	csio_free_rnode(rn);
+}
+
+/*
+ * csio_confirm_rnode - confirms rnode based on wwpn.
+ * @ln: lnode
+ * @rdev_flowid: remote device flowid
+ * @rdevp: remote device params
+ * This routines searches other rnode in list having same wwpn of new rnode.
+ * If there is a match, then matched rnode is returned and otherwise new rnode
+ * is returned.
+ * returns rnode.
+ */
+struct csio_rnode *
+csio_confirm_rnode(struct csio_lnode *ln, uint32_t rdev_flowid,
+		   struct fcoe_rdev_entry *rdevp)
+{
+	uint8_t rport_type;
+	struct csio_rnode *rn, *match_rn;
+	uint32_t vnp_flowid;
+
+	rport_type =
+		FW_RDEV_WR_RPORT_TYPE_GET(rdevp->rd_xfer_rdy_to_rport_type);
+	/* Drop rdev event for cntrl port */
+	if (rport_type == FAB_CTLR_VNPORT) {
+		csio_ln_dbg(ln,
+			    "Unhandled rport_type:%d recv in rdev evt "
+			    "ssni:x%x\n", rport_type, rdev_flowid);
+		return NULL;
+	}
+
+	/* Lookup on flowid */
+	rn = csio_rn_lookup(ln, rdev_flowid);
+	if (!rn) {
+
+		/* Drop events with duplicate flowid */
+		if (csio_rn_dup_flowid(ln, rdev_flowid, &vnp_flowid)) {
+			csio_ln_warn(ln,
+				     "ssni:%x already active on vnpi:%x",
+				     rdev_flowid, vnp_flowid);
+			return NULL;
+		}
+
+		/* skip wwpn lookup for fabric ports, cntrl port */
+		if (rport_type == FLOGI_VFPORT || rport_type == FDISC_VFPORT
+		    || rport_type == FAB_CTLR_VNPORT) {
+			goto alloc_rnode;
+		}
+
+		/* Lookup on wwpn for NPORTs */
+		rn = csio_rn_lookup_wwpn(ln, rdevp->wwpn);
+		if (!rn)
+			goto alloc_rnode;
+
+		/* found rn */
+		goto found_rnode;
+	} else {
+		/* verify rnode found for fabric ports, cntrl port */
+		if (rport_type == FLOGI_VFPORT || rport_type == FDISC_VFPORT
+		    || rport_type == FAB_CTLR_VNPORT) {
+
+			/* Rnode role mismatch. Allocate new rnode */
+			if (rn->role == CSIO_RNFR_NS ||
+			    rn->role == CSIO_RNFR_NPORT) {
+				csio_ln_dbg(ln,
+					"rnode role mismatch found ssni:x%x "
+					"role:%d new_type:%d\n",
+					rdev_flowid, rn->role, rport_type);
+				if (csio_is_rnode_ready(rn)) {
+					csio_ln_warn(ln,
+						     "rnode is already"
+						     "active ssni:x%x\n",
+						     rdev_flowid);
+					CSIO_DB_ASSERT(0);
+				}
+				csio_rn_flowid(rn) = CSIO_INVALID_IDX;
+				goto alloc_rnode;
+			} else
+				goto found_rnode;
+		}
+
+		/* wwpn match */
+		if (!memcmp(csio_rn_wwpn(rn), rdevp->wwpn, 8)) {
+			/* Update rn */
+			goto found_rnode;
+		}
+
+		/* Search for rnode that have same wwpn */
+		match_rn = csio_rn_lookup_wwpn(ln, rdevp->wwpn);
+		if (match_rn != NULL) {
+			csio_ln_dbg(ln,
+				"ssni:x%x changed for rport name(wwpn):%llx "
+				"did:x%x\n", rdev_flowid,
+				wwn_to_u64(rdevp->wwpn),
+				match_rn->nport_id);
+			csio_rn_flowid(rn) = CSIO_INVALID_IDX;
+			rn = match_rn;
+			CSIO_INC_STATS(ln, n_rnode_match);
+		} else {
+			csio_ln_dbg(ln,
+				"rnode wwpn mismatch found ssni:x%x "
+				"name(wwpn):%llx\n",
+				rdev_flowid,
+				wwn_to_u64(csio_rn_wwpn(rn)));
+			if (csio_is_rnode_ready(rn)) {
+				csio_ln_warn(ln,
+					     "rnode is already active "
+					     "wwpn:%llx ssni:x%x\n",
+					     wwn_to_u64(csio_rn_wwpn(rn)),
+					     rdev_flowid);
+				CSIO_DB_ASSERT(0);
+			}
+			csio_rn_flowid(rn) = CSIO_INVALID_IDX;
+			goto alloc_rnode;
+		}
+	}
+
+found_rnode:
+	csio_ln_dbg(ln, "found rnode:%p ssni:x%x name(wwpn):%llx\n",
+		rn, rdev_flowid, wwn_to_u64(rdevp->wwpn));
+
+	/* Update flowid */
+	csio_rn_flowid(rn) = rdev_flowid;
+
+	/* update rdev entry */
+	rn->rdev_entry = rdevp;
+	return rn;
+
+alloc_rnode:
+	rn = csio_get_rnode(ln, rdev_flowid);
+	if (!rn)
+		return NULL;
+
+	csio_ln_dbg(ln, "alloc rnode:%p ssni:x%x name(wwpn):%llx\n",
+		rn, rdev_flowid, wwn_to_u64(rdevp->wwpn));
+
+	/* update rdev entry */
+	rn->rdev_entry = rdevp;
+	return rn;
+}
+
+/*
+ * csio_rn_verify_rparams - verify rparams.
+ * @ln: lnode
+ * @rn: rnode
+ * @rdevp: remote device params
+ * returns success if rparams are verified.
+ */
+static int
+csio_rn_verify_rparams(struct csio_lnode *ln, struct csio_rnode *rn,
+			struct fcoe_rdev_entry *rdevp)
+{
+	uint8_t null[8];
+	uint8_t rport_type;
+	uint8_t fc_class;
+	uint32_t *did;
+
+	did = (uint32_t *) &rdevp->r_id[0];
+	rport_type =
+		FW_RDEV_WR_RPORT_TYPE_GET(rdevp->rd_xfer_rdy_to_rport_type);
+	switch (rport_type) {
+	case FLOGI_VFPORT:
+		rn->role = CSIO_RNFR_FABRIC;
+		if (((ntohl(*did) >> 8) & CSIO_DID_MASK) != FC_FID_FLOGI) {
+			csio_ln_err(ln, "ssni:x%x invalid fabric portid\n",
+				csio_rn_flowid(rn));
+			return -EINVAL;
+		}
+		/* NPIV support */
+		if (FW_RDEV_WR_NPIV_GET(rdevp->vft_to_qos))
+			ln->flags |= CSIO_LNF_NPIVSUPP;
+
+		break;
+
+	case NS_VNPORT:
+		rn->role = CSIO_RNFR_NS;
+		if (((ntohl(*did) >> 8) & CSIO_DID_MASK) != FC_FID_DIR_SERV) {
+			csio_ln_err(ln, "ssni:x%x invalid fabric portid\n",
+				csio_rn_flowid(rn));
+			return -EINVAL;
+		}
+		break;
+
+	case REG_FC4_VNPORT:
+	case REG_VNPORT:
+		rn->role = CSIO_RNFR_NPORT;
+		if (rdevp->event_cause == PRLI_ACC_RCVD ||
+			rdevp->event_cause == PRLI_RCVD) {
+			if (FW_RDEV_WR_TASK_RETRY_ID_GET(
+							rdevp->enh_disc_to_tgt))
+				rn->fcp_flags |= FCP_SPPF_OVLY_ALLOW;
+
+			if (FW_RDEV_WR_RETRY_GET(rdevp->enh_disc_to_tgt))
+				rn->fcp_flags |= FCP_SPPF_RETRY;
+
+			if (FW_RDEV_WR_CONF_CMPL_GET(rdevp->enh_disc_to_tgt))
+				rn->fcp_flags |= FCP_SPPF_CONF_COMPL;
+
+			if (FW_RDEV_WR_TGT_GET(rdevp->enh_disc_to_tgt))
+				rn->role |= CSIO_RNFR_TARGET;
+
+			if (FW_RDEV_WR_INI_GET(rdevp->enh_disc_to_tgt))
+				rn->role |= CSIO_RNFR_INITIATOR;
+		}
+
+		break;
+
+	case FDMI_VNPORT:
+	case FAB_CTLR_VNPORT:
+		rn->role = 0;
+		break;
+
+	default:
+		csio_ln_err(ln, "ssni:x%x invalid rport type recv x%x\n",
+			csio_rn_flowid(rn), rport_type);
+		return -EINVAL;
+	}
+
+	/* validate wwpn/wwnn for Name server/remote port */
+	if (rport_type == REG_VNPORT || rport_type == NS_VNPORT) {
+		memset(null, 0, 8);
+		if (!memcmp(rdevp->wwnn, null, 8)) {
+			csio_ln_err(ln,
+				    "ssni:x%x invalid wwnn received from"
+				    " rport did:x%x\n",
+				    csio_rn_flowid(rn),
+				    (ntohl(*did) & CSIO_DID_MASK));
+			return -EINVAL;
+		}
+
+		if (!memcmp(rdevp->wwpn, null, 8)) {
+			csio_ln_err(ln,
+				    "ssni:x%x invalid wwpn received from"
+				    " rport did:x%x\n",
+				    csio_rn_flowid(rn),
+				    (ntohl(*did) & CSIO_DID_MASK));
+			return -EINVAL;
+		}
+
+	}
+
+	/* Copy wwnn, wwpn and nport id */
+	rn->nport_id = (ntohl(*did) >> 8) & CSIO_DID_MASK;
+	memcpy(csio_rn_wwnn(rn), rdevp->wwnn, 8);
+	memcpy(csio_rn_wwpn(rn), rdevp->wwpn, 8);
+	rn->rn_sparm.csp.sp_bb_data = ntohs(rdevp->rcv_fr_sz);
+	fc_class = FW_RDEV_WR_CLASS_GET(rdevp->vft_to_qos);
+	rn->rn_sparm.clsp[fc_class - 1].cp_class = htons(FC_CPC_VALID);
+	return 0;
+}
+
+static void
+__csio_reg_rnode(struct csio_rnode *rn)
+{
+	struct csio_hw *hw = csio_lnode_to_hw(csio_rnode_to_lnode(rn));
+	struct csio_lnode *ln = csio_rnode_to_lnode(rn);
+
+	spin_unlock_irq(&hw->lock);
+	csio_reg_rnode(rn);
+	spin_lock_irq(&hw->lock);
+
+	if (rn->nport_id == FC_FID_MGMT_SERV)
+		csio_ln_fdmi_start(ln, (void *) rn);
+}
+
+static void
+__csio_unreg_rnode(struct csio_rnode *rn)
+{
+	struct csio_hw *hw = csio_lnode_to_hw(csio_rnode_to_lnode(rn));
+	LIST_HEAD(tmp_q);
+	int cmpl = 0;
+
+	if (!list_empty(&rn->host_cmpl_q)) {
+		csio_dbg(hw, "Returning completion queue I/Os\n");
+		list_splice_tail_init(&rn->host_cmpl_q, &tmp_q);
+		cmpl = 1;
+	}
+
+	spin_unlock_irq(&hw->lock);
+	csio_unreg_rnode(rn);
+	spin_lock_irq(&hw->lock);
+
+	/* Cleanup I/Os that were waiting for rnode to unregister */
+	if (cmpl)
+		csio_scsi_cleanup_io_q(csio_hw_to_scsim(hw), &tmp_q);
+
+}
+
+/*****************************************************************************/
+/* START: Rnode SM                                                           */
+/*****************************************************************************/
+
+/*
+ * csio_rns_uninit -
+ * @rn - rnode
+ * @evt - SM event.
+ *
+ */
+static void
+csio_rns_uninit(struct csio_rnode *rn, enum csio_rn_ev evt)
+{
+	struct csio_lnode *ln = csio_rnode_to_lnode(rn);
+	int ret = 0;
+
+	CSIO_INC_STATS(rn, n_evt_sm[evt]);
+
+	switch (evt) {
+	case CSIO_RNFE_LOGGED_IN:
+	case CSIO_RNFE_PLOGI_RECV:
+		ret = csio_rn_verify_rparams(ln, rn, rn->rdev_entry);
+		if (!ret) {
+			csio_set_state(&rn->sm, csio_rns_ready);
+			__csio_reg_rnode(rn);
+		} else {
+			CSIO_INC_STATS(rn, n_err_inval);
+		}
+		break;
+	case CSIO_RNFE_LOGO_RECV:
+		csio_ln_dbg(ln,
+			    "ssni:x%x Ignoring event %d recv "
+			    "in rn state[uninit]\n", csio_rn_flowid(rn), evt);
+		CSIO_INC_STATS(rn, n_evt_drop);
+		break;
+	default:
+		csio_ln_dbg(ln,
+			    "ssni:x%x unexp event %d recv "
+			    "in rn state[uninit]\n", csio_rn_flowid(rn), evt);
+		CSIO_INC_STATS(rn, n_evt_unexp);
+		break;
+	}
+}
+
+/*
+ * csio_rns_ready -
+ * @rn - rnode
+ * @evt - SM event.
+ *
+ */
+static void
+csio_rns_ready(struct csio_rnode *rn, enum csio_rn_ev evt)
+{
+	struct csio_lnode *ln = csio_rnode_to_lnode(rn);
+	int ret = 0;
+
+	CSIO_INC_STATS(rn, n_evt_sm[evt]);
+
+	switch (evt) {
+	case CSIO_RNFE_LOGGED_IN:
+	case CSIO_RNFE_PLOGI_RECV:
+		csio_ln_dbg(ln,
+			"ssni:x%x Ignoring event %d recv from did:x%x "
+			"in rn state[ready]\n", csio_rn_flowid(rn), evt,
+			rn->nport_id);
+		CSIO_INC_STATS(rn, n_evt_drop);
+		break;
+
+	case CSIO_RNFE_PRLI_DONE:
+	case CSIO_RNFE_PRLI_RECV:
+		ret = csio_rn_verify_rparams(ln, rn, rn->rdev_entry);
+		if (!ret)
+			__csio_reg_rnode(rn);
+		else
+			CSIO_INC_STATS(rn, n_err_inval);
+
+		break;
+	case CSIO_RNFE_DOWN:
+		csio_set_state(&rn->sm, csio_rns_offline);
+		__csio_unreg_rnode(rn);
+
+		/* FW expected to internally aborted outstanding SCSI WRs
+		 * and return all SCSI WRs to host with status "ABORTED".
+		 */
+		break;
+
+	case CSIO_RNFE_LOGO_RECV:
+		csio_set_state(&rn->sm, csio_rns_offline);
+
+		__csio_unreg_rnode(rn);
+
+		/* FW expected to internally aborted outstanding SCSI WRs
+		 * and return all SCSI WRs to host with status "ABORTED".
+		 */
+		break;
+
+	case CSIO_RNFE_CLOSE:
+		/*
+		 * Each rnode receives CLOSE event when driver is removed or
+		 * device is reset
+		 * Note: All outstanding IOs on remote port need to returned
+		 * to uppper layer with appropriate error before sending
+		 * CLOSE event
+		 */
+		csio_set_state(&rn->sm, csio_rns_uninit);
+		__csio_unreg_rnode(rn);
+		break;
+
+	case CSIO_RNFE_NAME_MISSING:
+		csio_set_state(&rn->sm, csio_rns_disappeared);
+		__csio_unreg_rnode(rn);
+
+		/*
+		 * FW expected to internally aborted outstanding SCSI WRs
+		 * and return all SCSI WRs to host with status "ABORTED".
+		 */
+
+		break;
+
+	default:
+		csio_ln_dbg(ln,
+			"ssni:x%x unexp event %d recv from did:x%x "
+			"in rn state[uninit]\n", csio_rn_flowid(rn), evt,
+			rn->nport_id);
+		CSIO_INC_STATS(rn, n_evt_unexp);
+		break;
+	}
+}
+
+/*
+ * csio_rns_offline -
+ * @rn - rnode
+ * @evt - SM event.
+ *
+ */
+static void
+csio_rns_offline(struct csio_rnode *rn, enum csio_rn_ev evt)
+{
+	struct csio_lnode *ln = csio_rnode_to_lnode(rn);
+	int ret = 0;
+
+	CSIO_INC_STATS(rn, n_evt_sm[evt]);
+
+	switch (evt) {
+	case CSIO_RNFE_LOGGED_IN:
+	case CSIO_RNFE_PLOGI_RECV:
+		ret = csio_rn_verify_rparams(ln, rn, rn->rdev_entry);
+		if (!ret) {
+			csio_set_state(&rn->sm, csio_rns_ready);
+			__csio_reg_rnode(rn);
+		} else {
+			CSIO_INC_STATS(rn, n_err_inval);
+			csio_post_event(&rn->sm, CSIO_RNFE_CLOSE);
+		}
+		break;
+
+	case CSIO_RNFE_DOWN:
+		csio_ln_dbg(ln,
+			"ssni:x%x Ignoring event %d recv from did:x%x "
+			"in rn state[offline]\n", csio_rn_flowid(rn), evt,
+			rn->nport_id);
+		CSIO_INC_STATS(rn, n_evt_drop);
+		break;
+
+	case CSIO_RNFE_CLOSE:
+		/* Each rnode receives CLOSE event when driver is removed or
+		 * device is reset
+		 * Note: All outstanding IOs on remote port need to returned
+		 * to uppper layer with appropriate error before sending
+		 * CLOSE event
+		 */
+		csio_set_state(&rn->sm, csio_rns_uninit);
+		break;
+
+	case CSIO_RNFE_NAME_MISSING:
+		csio_set_state(&rn->sm, csio_rns_disappeared);
+		break;
+
+	default:
+		csio_ln_dbg(ln,
+			"ssni:x%x unexp event %d recv from did:x%x "
+			"in rn state[offline]\n", csio_rn_flowid(rn), evt,
+			rn->nport_id);
+		CSIO_INC_STATS(rn, n_evt_unexp);
+		break;
+	}
+}
+
+/*
+ * csio_rns_disappeared -
+ * @rn - rnode
+ * @evt - SM event.
+ *
+ */
+static void
+csio_rns_disappeared(struct csio_rnode *rn, enum csio_rn_ev evt)
+{
+	struct csio_lnode *ln = csio_rnode_to_lnode(rn);
+	int ret = 0;
+
+	CSIO_INC_STATS(rn, n_evt_sm[evt]);
+
+	switch (evt) {
+	case CSIO_RNFE_LOGGED_IN:
+	case CSIO_RNFE_PLOGI_RECV:
+		ret = csio_rn_verify_rparams(ln, rn, rn->rdev_entry);
+		if (!ret) {
+			csio_set_state(&rn->sm, csio_rns_ready);
+			__csio_reg_rnode(rn);
+		} else {
+			CSIO_INC_STATS(rn, n_err_inval);
+			csio_post_event(&rn->sm, CSIO_RNFE_CLOSE);
+		}
+		break;
+
+	case CSIO_RNFE_CLOSE:
+		/* Each rnode receives CLOSE event when driver is removed or
+		 * device is reset.
+		 * Note: All outstanding IOs on remote port need to returned
+		 * to uppper layer with appropriate error before sending
+		 * CLOSE event
+		 */
+		csio_set_state(&rn->sm, csio_rns_uninit);
+		break;
+
+	case CSIO_RNFE_DOWN:
+	case CSIO_RNFE_NAME_MISSING:
+		csio_ln_dbg(ln,
+			"ssni:x%x Ignoring event %d recv from did x%x"
+			"in rn state[disappeared]\n", csio_rn_flowid(rn),
+			evt, rn->nport_id);
+		break;
+
+	default:
+		csio_ln_dbg(ln,
+			"ssni:x%x unexp event %d recv from did x%x"
+			"in rn state[disappeared]\n", csio_rn_flowid(rn),
+			evt, rn->nport_id);
+		CSIO_INC_STATS(rn, n_evt_unexp);
+		break;
+	}
+}
+
+/*****************************************************************************/
+/* END: Rnode SM                                                             */
+/*****************************************************************************/
+
+/*
+ * csio_rnode_devloss_handler - Device loss event handler
+ * @rn: rnode
+ *
+ * Post event to close rnode SM and free rnode.
+ */
+void
+csio_rnode_devloss_handler(struct csio_rnode *rn)
+{
+	struct csio_lnode *ln = csio_rnode_to_lnode(rn);
+
+	/* ignore if same rnode came back as online */
+	if (csio_is_rnode_ready(rn))
+		return;
+
+	csio_post_event(&rn->sm, CSIO_RNFE_CLOSE);
+
+	/* Free rn if in uninit state */
+	if (csio_is_rnode_uninit(rn))
+		csio_put_rnode(ln, rn);
+}
+
+/**
+ * csio_rnode_fwevt_handler - Event handler for firmware rnode events.
+ * @rn:		rnode
+ *
+ */
+void
+csio_rnode_fwevt_handler(struct csio_rnode *rn, uint8_t fwevt)
+{
+	struct csio_lnode *ln = csio_rnode_to_lnode(rn);
+	enum csio_rn_ev evt;
+
+	evt = CSIO_FWE_TO_RNFE(fwevt);
+	if (!evt) {
+		csio_ln_err(ln, "ssni:x%x Unhandled FW Rdev event: %d\n",
+			    csio_rn_flowid(rn), fwevt);
+		CSIO_INC_STATS(rn, n_evt_unexp);
+		return;
+	}
+	CSIO_INC_STATS(rn, n_evt_fw[fwevt]);
+
+	/* Track previous & current events for debugging */
+	rn->prev_evt = rn->cur_evt;
+	rn->cur_evt = fwevt;
+
+	/* Post event to rnode SM */
+	csio_post_event(&rn->sm, evt);
+
+	/* Free rn if in uninit state */
+	if (csio_is_rnode_uninit(rn))
+		csio_put_rnode(ln, rn);
+}
+
+/*
+ * csio_rnode_init - Initialize rnode.
+ * @rn: RNode
+ * @ln: Associated lnode
+ *
+ * Caller is responsible for holding the lock. The lock is required
+ * to be held for inserting the rnode in ln->rnhead list.
+ */
+static int
+csio_rnode_init(struct csio_rnode *rn, struct csio_lnode *ln)
+{
+	csio_rnode_to_lnode(rn) = ln;
+	csio_init_state(&rn->sm, csio_rns_uninit);
+	INIT_LIST_HEAD(&rn->host_cmpl_q);
+	csio_rn_flowid(rn) = CSIO_INVALID_IDX;
+
+	/* Add rnode to list of lnodes->rnhead */
+	list_add_tail(&rn->sm.sm_list, &ln->rnhead);
+
+	return 0;
+}
+
+static void
+csio_rnode_exit(struct csio_rnode *rn)
+{
+	list_del_init(&rn->sm.sm_list);
+	CSIO_DB_ASSERT(list_empty(&rn->host_cmpl_q));
+}
-- 
1.7.1

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox