Netdev List
 help / color / mirror / Atom feed
* [PATCH 0/6] netfilter updates for net-next
From: pablo @ 2012-12-04 17:31 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev

From: Pablo Neira Ayuso <pablo@netfilter.org>

Hi David,

If time allows, please pull the following 6 updates for your
net-next tree:

* Remove limitation in the maximum number of supported sets in ipset.
  Now ipset automagically increments the number of slots in the array
  of sets by 64 new spare slots, from Jozsef Kadlecsik.

* Partially remove the generic queue infrastructure now that ip_queue
  is gone. Its only client is nfnetlink_queue now, from Florian
  Westphal.

* Add missing attribute policy checkings in ctnetlink, from Florian
  Westphal.

* Automagically kill conntrack entries that use the wrong output
  interface for the masquerading case in case of routing changes,
  from Jozsef Kadlecsik.

* Two patches two improve ct object traceability. Now ct objects are
  always placed in any of the existing lists. This allows us to dump
  the content of unconfirmed and dying conntracks via ctnetlink as
  a way to provide more instrumentation in case you suspect leaks,
  from myself.

You can pull these changes from:

git://1984.lsi.us.es/nf-next master

Thanks!

Florian Westphal (2):
  netfilter: kill support for per-af queue backends
  netfilter: ctnetlink: nla_policy updates

Jozsef Kadlecsik (2):
  netfilter: ipset: Increase the number of maximal sets automatically
  netfilter: nf_nat: Handle routing changes in MASQUERADE target

Pablo Neira Ayuso (2):
  netfilter: nf_conntrack: improve nf_conn object traceability
  netfilter: ctnetlink: dump entries from the dying and unconfirmed lists

 include/net/netfilter/nf_conntrack.h               |    2 +-
 include/net/netfilter/nf_nat.h                     |   15 ++
 include/net/netfilter/nf_queue.h                   |    8 +-
 include/uapi/linux/netfilter/nfnetlink_conntrack.h |    2 +
 net/ipv4/netfilter/iptable_nat.c                   |    4 +
 net/ipv6/netfilter/ip6table_nat.c                  |    4 +
 net/netfilter/core.c                               |    2 -
 net/netfilter/ipset/ip_set_core.c                  |  243 +++++++++++++-------
 net/netfilter/nf_conntrack_core.c                  |   25 +-
 net/netfilter/nf_conntrack_netlink.c               |  118 +++++++++-
 net/netfilter/nf_conntrack_proto_tcp.c             |    2 +
 net/netfilter/nf_queue.c                           |  152 ++----------
 net/netfilter/nfnetlink_queue_core.c               |   14 +-
 13 files changed, 332 insertions(+), 259 deletions(-)

--
1.7.10.4

^ permalink raw reply

* [PATCH 6/6] netfilter: nf_nat: Handle routing changes in MASQUERADE target
From: pablo @ 2012-12-04 17:31 UTC (permalink / raw)
  To: netfilter-devel; +Cc: davem, netdev
In-Reply-To: <1354642293-4114-1-git-send-email-pablo@netfilter.org>

From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>

When the route changes (backup default route, VPNs) which affect a
masqueraded target, the packets were sent out with the outdated source
address. The patch addresses the issue by comparing the outgoing interface
directly with the masqueraded interface in the nat table.

Events are inefficient in this case, because it'd require adding route
events to the network core and then scanning the whole conntrack table
and re-checking the route for all entry.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_nat.h    |   15 +++++++++++++++
 net/ipv4/netfilter/iptable_nat.c  |    4 ++++
 net/ipv6/netfilter/ip6table_nat.c |    4 ++++
 3 files changed, 23 insertions(+)

diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index bd8eea7..ad14a79 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -68,4 +68,19 @@ static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct)
 #endif
 }
 
+static inline bool nf_nat_oif_changed(unsigned int hooknum,
+				      enum ip_conntrack_info ctinfo,
+				      struct nf_conn_nat *nat,
+				      const struct net_device *out)
+{
+#if IS_ENABLED(CONFIG_IP_NF_TARGET_MASQUERADE) || \
+    IS_ENABLED(CONFIG_IP6_NF_TARGET_MASQUERADE)
+	return nat->masq_index && hooknum == NF_INET_POST_ROUTING &&
+	       CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL &&
+	       nat->masq_index != out->ifindex;
+#else
+	return false;
+#endif
+}
+
 #endif
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index ac635a7..da2c8a3 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -134,6 +134,10 @@ nf_nat_ipv4_fn(unsigned int hooknum,
 		/* ESTABLISHED */
 		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
 			     ctinfo == IP_CT_ESTABLISHED_REPLY);
+		if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) {
+			nf_ct_kill_acct(ct, ctinfo, skb);
+			return NF_DROP;
+		}
 	}
 
 	return nf_nat_packet(ct, ctinfo, hooknum, skb);
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index fa84cf8..6c8ae24 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -137,6 +137,10 @@ nf_nat_ipv6_fn(unsigned int hooknum,
 		/* ESTABLISHED */
 		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
 			     ctinfo == IP_CT_ESTABLISHED_REPLY);
+		if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) {
+			nf_ct_kill_acct(ct, ctinfo, skb);
+			return NF_DROP;
+		}
 	}
 
 	return nf_nat_packet(ct, ctinfo, hooknum, skb);
-- 
1.7.10.4


^ permalink raw reply related

* Re: userspace utils for linux-can
From: Marc Kleine-Budde @ 2012-12-04 17:19 UTC (permalink / raw)
  To: ftpadmin
  Cc: Oliver Hartkopp, Wolfgang Grandegger, David Miller,
	Linux Netdev List
In-Reply-To: <50B4D2B8.8020707@pengutronix.de>

[-- Attachment #1: Type: text/plain, Size: 657 bytes --]

On 11/27/2012 03:48 PM, Marc Kleine-Budde wrote:
> Hello Kernel.org Admins,
> 
> I'd like to put the tarballs of the linux-can userspace testing
> utilities on the kernel.org infrastructure, it this possible? I was
> thinking about somewhere under:
> 
> http://kernel.org/pub/linux/utils/net
> 
> A possible directory name could be "can-utils"

ping

regards,
MArc

-- 
Pengutronix e.K.                  | Marc Kleine-Budde           |
Industrial Linux Solutions        | Phone: +49-231-2826-924     |
Vertretung West/Dortmund          | Fax:   +49-5121-206917-5555 |
Amtsgericht Hildesheim, HRA 2686  | http://www.pengutronix.de   |


[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 261 bytes --]

^ permalink raw reply

* Re: [PATCH] net/macb: Use dmapool to align descriptors on 64bits
From: Nicolas Ferre @ 2012-12-04 17:18 UTC (permalink / raw)
  To: Ben Hutchings
  Cc: David S. Miller, netdev, linux-arm-kernel, linux-kernel,
	Joachim Eastwood, Jean-Christophe PLAGNIOL-VILLARD
In-Reply-To: <1354543290.7030.26.camel@deadeye.wl.decadent.org.uk>

On 12/03/2012 03:01 PM, Ben Hutchings :
> On Mon, 2012-12-03 at 13:15 +0100, Nicolas Ferre wrote:
>> Depending on datapath, some revisions of GEM need
>> 64bits aligned descriptors. Use dmapool to allocate
>> these descriptors.
>> Note that different size between RX and TX rings
>> leads to the creation of two pools.
>>
>> Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
> [...]
> 
> dma_alloc_coherent() allocates whole pages, which I think is quite
> enough alignment.  You can't save memory by doing this, since each pool
> needs at least one page.  So what is this change meant to achieve?

Well... I guess we can forget this patch then ;-)

Thanks, best regards,
-- 
Nicolas Ferre

^ permalink raw reply

* Re: [PATCH v2] net/macb: Use non-coherent memory for rx buffers
From: Nicolas Ferre @ 2012-12-04 17:16 UTC (permalink / raw)
  To: David Laight
  Cc: David S. Miller, netdev, linux-arm-kernel, linux-kernel,
	Joachim Eastwood, Jean-Christophe PLAGNIOL-VILLARD,
	Havard Skinnemoen
In-Reply-To: <AE90C24D6B3A694183C094C60CF0A2F6026B70D6@saturn3.aculab.com>

On 12/03/2012 03:25 PM, David Laight :
>> On 12/03/2012 01:43 PM, David Laight :
>>>> Allocate regular pages to use as backing for the RX ring and use the
>>>> DMA API to sync the caches. This should give a bit better performance
>>>> since it allows the CPU to do burst transfers from memory. It is also
>>>> a necessary step on the way to reduce the amount of copying done by
>>>> the driver.
>>>
>>> I've not tried to understand the patches, but you have to be
>>> very careful using non-snooped memory for descriptor rings.
>>> No amount of DMA API calls can sort out some of the issues.
>>
>> David,
>>
>> Maybe I have not described the patch properly but the non-coherent
>> memory is not used for descriptor rings. It is used for DMA buffers
>> pointed out by descriptors (that are allocated as coherent memory).
>>
>> As buffers are filled up by the interface DMA and then, afterwards, used
>> by the driver to pass data to the net layer, it seems to me that the use
>> of non-coherent memory is sensible.
> 
> Ah, ok - difficult to actually determine from a fast read of the code.
> So you invalidate (I think that is the right term) all the cache lines
> that are part of each rx buffer before giving it back to the MAC unit.
> (Maybe that first time, and just those cache lines that might have been
> written to after reception - I'd worry about whether the CRC is written
> into the rx buffer!)

If I understand well, you mean that the call to:

		dma_sync_single_range_for_device(&bp->pdev->dev, phys,
				pg_offset, frag_len, DMA_FROM_DEVICE);

in the rx path after having copied the data to skb is not needed?
That is also the conclusion that I found after having thinking about
this again... I will check this.

For the CRC, my driver is not using the CRC offloading feature for the
moment. So no CRC is written by the device.

> I was wondering if the code needs to do per page allocations?
> Perhaps that is necessary to avoid needing a large block of
> contiguous physical memory (and virtual addresses)?

The page management seems interesting for future management of RX
buffers as skb fragments: that will allow to avoid copying received data.

> I know from some experiments done many years ago that a data
> copy in the MAC tx and rx path isn't necessarily as bad as
> people may think - especially if it removes complicated
> 'buffer loaning' schemes and/or iommu setup (or bounce
> buffers due to limited hardware memory addressing).
> 
> The rx copy can usually be made to be a 'whole word' copy
> (ie you copy the two bytes of garbage that (mis)align the
> destination MAC address, and some bytes after the CRC.
> With some hardware I believe it is possible for the cache
> controller to do cache-line aligned copies very quickly!
> (Some very new x86 cpus might be doing this for 'rep movsd'.)

Well, on our side, the "memory bus" resource is precious, so I imagine
that even with an optimized copy, limiting the use of this resource
should be better.

> The copy in the rx path is also better for short packets
> the can end up queued for userspace (although a copy in
> the socket code would solve that one.

Sure, some patches by Haavard that I am working on at the moment are
taking care of copying in any cases the first 62 bytes (+2 bytes
alignment) for each packet so that we cover the case of short packets
and headers...

Thanks for your comments, best regards,
-- 
Nicolas Ferre

^ permalink raw reply

* Re: [PATCH] vxlan: Fix error that was resulting in VXLAN MTU size being 10 bytes too large
From: Alexander Duyck @ 2012-12-04 17:12 UTC (permalink / raw)
  To: Naoto MATSUMOTO; +Cc: Joseph Glanville, Stephen Hemminger, David Miller, netdev
In-Reply-To: <20121204094809.9689.C42C3789@sakura.ad.jp>

On 12/03/2012 04:48 PM, Naoto MATSUMOTO wrote:
> 
> Hi all
> 
> Sharing my testlab resut for you ;-)
> 
> A First Look At VXLAN over Infiniband Network On Linux 3.7-rc7
> http://slidesha.re/TsCKWc
> 
> plz enjyoi it.
> 
> --
> Naoto

What was the MTU for the underlying IPoIB devices in your test?  Just
wondering because the MTU on the VXLAN is showing as 65470 which would
imply the IPoIB devices should have a 65520 MTU and I just wanted to
confirm that.

Thanks,

Alex

^ permalink raw reply

* Re: [PATCH net-next v2] bridge: export multicast database via netlink
From: Stephen Hemminger @ 2012-12-04 16:53 UTC (permalink / raw)
  To: Cong Wang
  Cc: netdev, bridge, Herbert Xu, Jesper Dangaard Brouer, Thomas Graf,
	David S. Miller
In-Reply-To: <1354539824-7898-1-git-send-email-amwang@redhat.com>

On Mon,  3 Dec 2012 21:03:43 +0800
Cong Wang <amwang@redhat.com> wrote:

> V2: drop patch 1/2, export ifindex directly
>     Redesign netlink attributes
>     Improve netlink seq check
>     Handle IPv6 addr as well
> 
> TODO: remove debugging printk's
> 
> This patch exports bridge multicast database via netlink
> message type RTM_GETMDB. Similar to fdb, but currently bridge-specific.
> We may need to support modify multicast database too (RTM_{ADD,DEL}MDB).
> 
> Cc: Herbert Xu <herbert@gondor.apana.org.au>
> Cc: Stephen Hemminger <shemminger@vyatta.com>
> Cc: "David S. Miller" <davem@davemloft.net>
> Cc: Thomas Graf <tgraf@suug.ch>
> Cc: Jesper Dangaard Brouer <brouer@redhat.com>
> Signed-off-by: Cong Wang <amwang@redhat.com>
>     

Minor nit reported by checkpatch was the messages should be using the api
which provides the most info in the log to identify.

WARNING: Prefer netdev_info(netdev, ... then dev_info(dev, ... then pr_info(...  to printk(KERN_INFO ...
#190: FILE: net/bridge/br_mdb.c:28:
+		printk(KERN_INFO "no router on bridge\n")

There is a set of macro's already for use in bridging code:
                br_info(br, "no router on bridge\n");

^ permalink raw reply

* Re: [PATCH net-next v2] bridge: implement multicast fast leave
From: Stephen Hemminger @ 2012-12-04 16:44 UTC (permalink / raw)
  To: Cong Wang; +Cc: netdev, bridge, Herbert Xu, David S. Miller
In-Reply-To: <1354615000-19660-1-git-send-email-amwang@redhat.com>

On Tue,  4 Dec 2012 17:56:40 +0800
Cong Wang <amwang@redhat.com> wrote:

> V2: make the toggle per-port
> 
> Fast leave allows bridge to immediately stops the multicast
> traffic on the port receives IGMP Leave when IGMP snooping is enabled,
> no timeouts are observed.
> 
> Cc: Herbert Xu <herbert@gondor.apana.org.au>
> Cc: Stephen Hemminger <shemminger@vyatta.com>
> Cc: "David S. Miller" <davem@davemloft.net>
> Signed-off-by: Cong Wang <amwang@redhat.com>

Ok. as is.

A good followup change would be to migrate
these multicast flags into the existing flags bits (see hairpin etc),
and make them manageable via netlink.

^ permalink raw reply

* Re: [RFC PATCH 2/2] tun: fix LSM/SELinux labeling of tun/tap devices
From: Paul Moore @ 2012-12-04 16:18 UTC (permalink / raw)
  To: Jason Wang; +Cc: netdev, linux-security-module, selinux, mst
In-Reply-To: <7659411.O2Or69Bf6n@jason-thinkpad-t430s>

On Tuesday, December 04, 2012 09:24:43 PM Jason Wang wrote:
> On Monday, December 03, 2012 11:22:29 AM Paul Moore wrote:
> > It may be that I'm misunderstanding TUNSETQUEUE and/or TUNSETIFF.  Can you
> > elaborate as to why they should be different?
> 
> If I understand correctly, before multiqueue patchset, TUNSETIFF is used to:
> 
> 1) Create the tun/tap network device
> 2) For persistent device, re-attach the fd to the network device / socket.
> In this case, we call selinux_tun_dev_attch() to relabel the socket sid (in
> fact also the device's since the socket were persistent also) to the sid of
> process that calls TUNSETIFF.
> 
> So, after the changes of multiqueue, we need try to preserve those policy.
> The interesting part is the introducing of TUNSETQUEUE, it's used to attach
> more file descriptors/sockets to a tun/tap device after at least one file
> descriptor were attached to the tun/tap device through TUNSETIFF. So I
> think maybe we need differ those two ioctls. This patch looks fine for
> TUNSETQUEUE, but for TUNSETIFF, we need relabel the tunsec to the process
> that calling TUNSETIFF for persistent device?

Okay, based on your explanation of TUNSETQUEUE, the steps below are what I 
believe we need to do ... if you disagree speak up quickly please.

A. TUNSETIFF (new, non-persistent device)

[Allocate and initialize the tun_struct LSM state based on the calling 
process, use this state to label the TUN socket.]

1. Call security_tun_dev_create() which authorizes the action.
2. Call security_tun_dev_alloc_security() which allocates the tun_struct LSM 
blob and SELinux sets some internal blob state to record the label of the 
calling process.
3. Call security_tun_dev_attach() which sets the label of the TUN socket to 
match the label stored in the tun_struct LSM blob during A2.  No authorization 
is done at this point since the socket is new/unlabeled.

B. TUNSETIFF (existing, persistent device)

[Relabel the existing tun_struct LSM state based on the calling process, use
this state to label the TUN socket.]

1. Attempt to relabel/reset the tun_struct LSM blob from the currently stored 
value, set during A2, to the label of the current calling process. *** THIS IS 
NOT CURRENTLY DONE IN THE RFC PATCH ***
2. Call security_tun_dev_attach() which sets the label of the TUN socket to
match the label stored in the tun_struct LSM blob during B1. No authorization 
is done at this point since the socket is new/unlabeled.

C. TUNSETQUEUE

[Use the existing tun_struct LSM state to label the new TUN socket.]

1. Call security_tun_dev_attach() which sets the label of the TUN socket to 
match the label stored in the tun_struct LSM blob set during either A2 or B1.  
No authorization is done at this point since the socket is new/unlabeled.

> btw. Current code does allow calling TUNSETQUEUE to a persistent tun/tap
> device with no file attached. It should be a bug and need to be fixed.

Since you wrote that code will you be submitting a patch to fix that problem?

> > One thing that I think we probably should change is the relabelto/from
> > permissions in the function above (selinux_tun_dev_attach()); in the case
> > where the socket does not yet have a label, e.g. 'sksec->sid == 0', we
> > should probably skip the relabel permissions since we want to assign the
> > TUN device label regardless in this case.
> 
> I'm not familiar with the selinux, have a quick glance of the code, looks
> like the label has been initialized to SECINITSID_KERNEL in
> selinux_socket_post_create().

Unless I've missed something in your changes, the multiqueue code never calls 
any socket code which ends up calling {security,selinux}_socket_post_create(); 
I believe you only call sk_alloc() which ends up calling 
{security,selinux}_sk_alloc() which sets SECINITSID_UNLABELED (I mistakenly 
wrote 0 instead in my earlier email which is techincally SECSID_NULL).  Either 
way, I still think the logic I originally described above is correct.

-- 
paul moore
security and virtualization @ redhat


^ permalink raw reply

* Re: [RFT PATCH] 8139cp: properly support change of MTU values [v2]
From: David Woodhouse @ 2012-12-04 15:44 UTC (permalink / raw)
  To: Francois Romieu; +Cc: jogreene, David Miller, netdev
In-Reply-To: <20121203204608.GA9815@electric-eye.fr.zoreil.com>

[-- Attachment #1: Type: text/plain, Size: 934 bytes --]

On Mon, 2012-12-03 at 21:46 +0100, Francois Romieu wrote:
> David Miller <davem@davemloft.net> :
> [...]
> > I've applied this to net-next, if it triggers any problems we have
> > some time to work it out before 3.8 is released.
> 
> I have bounced the messages to David Woodhouse since he authored the
> last 8139cp changes in net-next and owns the hardware to notice
> regressions.

Thanks. 

This almost works. The patch itself is fine, but the device can't
receive packets larger than 2266 bytes (ping -s 2238). After that, I get
rx_fifo errors. I think the RX FIFO is only 2KiB on the 8139cp, isn't
it? So after that it's dependent on how fast it can shovel it out across
the PCI bus. Which is "not fast" in this case.

Transmit appears to be fine.

-- 
David Woodhouse                            Open Source Technology Centre
David.Woodhouse@intel.com                              Intel Corporation




[-- Attachment #2: smime.p7s --]
[-- Type: application/x-pkcs7-signature, Size: 6171 bytes --]

^ permalink raw reply

* Re: [RFC PATCH 2/2] tun: fix LSM/SELinux labeling of tun/tap devices
From: Michael S. Tsirkin @ 2012-12-04 15:24 UTC (permalink / raw)
  To: Jason Wang; +Cc: Paul Moore, netdev, linux-security-module, selinux
In-Reply-To: <7659411.O2Or69Bf6n@jason-thinkpad-t430s>

On Tue, Dec 04, 2012 at 09:24:43PM +0800, Jason Wang wrote:
> On Monday, December 03, 2012 11:22:29 AM Paul Moore wrote:
> > On Monday, December 03, 2012 06:15:42 PM Jason Wang wrote:
> > > On 11/30/2012 06:06 AM, Paul Moore wrote:
> > > > This patch corrects some problems with LSM/SELinux that were introduced
> > > > with the multiqueue patchset.  The problem stems from the fact that the
> > > > multiqueue work changed the relationship between the tun device and its
> > > > associated socket; before the socket persisted for the life of the
> > > > device, however after the multiqueue changes the socket only persisted
> > > > for the life of the userspace connection (fd open).  For non-persistent
> > > > devices this is not an issue, but for persistent devices this can cause
> > > > the tun device to lose its SELinux label.
> > > > 
> > > > We correct this problem by adding an opaque LSM security blob to the
> > > > tun device struct which allows us to have the LSM security state, e.g.
> > > > SELinux labeling information, persist for the lifetime of the tun
> > > > device.
> > 
> > ...
> > 
> > > > -static int selinux_tun_dev_attach(struct sock *sk)
> > > > +static int selinux_tun_dev_attach(struct sock *sk, void *security)
> > > > 
> > > >  {
> > > > 
> > > > +	struct tun_security_struct *tunsec = security;
> > > > 
> > > >  	struct sk_security_struct *sksec = sk->sk_security;
> > > >  	u32 sid = current_sid();
> > > >  	int err;
> > > > 
> > > > +	/* we don't currently perform any NetLabel based labeling here ...
> > > > 
> > > >  	err = avc_has_perm(sid, sksec->sid, SECCLASS_TUN_SOCKET,
> > > >  	
> > > >  			   TUN_SOCKET__RELABELFROM, NULL);
> > > >  	
> > > >  	if (err)
> > > >  	
> > > >  		return err;
> > > > 
> > > > -	err = avc_has_perm(sid, sid, SECCLASS_TUN_SOCKET,
> > > > +	err = avc_has_perm(sid, tunsec->sid, SECCLASS_TUN_SOCKET,
> > > > 
> > > >  			   TUN_SOCKET__RELABELTO, NULL);
> > > >  	
> > > >  	if (err)
> > > >  	
> > > >  		return err;
> > > > 
> > > > -	sksec->sid = sid;
> > > > +	sksec->sid = tunsec->sid;
> > > > +	sksec->sclass = SECCLASS_TUN_SOCKET;
> > > 
> > > I'm not sure whether this is correct, looks like we need to differ between
> > > TUNSETQUEUE and TUNSETIFF. When userspace call TUNSETIFF for persistent
> > > device, looks like we need change the sid of tunsec like in the past.
> > 
> > It may be that I'm misunderstanding TUNSETQUEUE and/or TUNSETIFF.  Can you
> > elaborate as to why they should be different?
> 
> If I understand correctly, before multiqueue patchset, TUNSETIFF is used to:
> 
> 1) Create the tun/tap network device
> 2) For persistent device, re-attach the fd to the network device / socket. In 
> this case, we call selinux_tun_dev_attch() to relabel the socket sid (in fact 
> also the device's since the socket were persistent also) to the sid of process 
> that calls TUNSETIFF.
> 
> So, after the changes of multiqueue, we need try to preserve those policy. The 
> interesting part is the introducing of TUNSETQUEUE, it's used to attach more 
> file descriptors/sockets to a tun/tap device after at least one file descriptor 
> were attached to the tun/tap device through TUNSETIFF. So I think maybe we 
> need differ those two ioctls. This patch looks fine for TUNSETQUEUE, but for 
> TUNSETIFF, we need relabel the tunsec to the process that calling TUNSETIFF 
> for persistent device?

Basically, it looks like currently once you get a tun fd,
you can attach it to any device even if normally
selinux would prevent you from accessing it.

If we reuse selinux_tun_dev_attach, we won't need to
change selinux policy, with a new capability we will need to change it
to allow libvirt to do TUNSETQUEUE.


> 
> btw. Current code does allow calling TUNSETQUEUE to a persistent tun/tap 
> device with no file attached. It should be a bug and need to be fixed.

Is this a problem? You can always
attach
set queue
detach

and it would be hard to prevent this ...

> > 
> > One thing that I think we probably should change is the relabelto/from
> > permissions in the function above (selinux_tun_dev_attach()); in the case
> > where the socket does not yet have a label, e.g. 'sksec->sid == 0', we
> > should probably skip the relabel permissions since we want to assign the
> > TUN device label regardless in this case.
> 
> I'm not familiar with the selinux, have a quick glance of the code, looks like 
> the label has been initialized to SECINITSID_KERNEL in 
> selinux_socket_post_create().
> 
> Thanks

^ permalink raw reply

* Re: [PATCH net-next 2/3] virtio_net: multiqueue support
From: Michael S. Tsirkin @ 2012-12-04 15:11 UTC (permalink / raw)
  To: Jason Wang
  Cc: krkumar2, kvm, netdev, linux-kernel, virtualization, bhutchings,
	jwhan, davem, shiyer
In-Reply-To: <7748384.YyJVzCCbsc@jason-thinkpad-t430s>

On Tue, Dec 04, 2012 at 10:45:33PM +0800, Jason Wang wrote:
> On Tuesday, December 04, 2012 03:24:22 PM Michael S. Tsirkin wrote:
> > I found some bugs, see below.
> > Also some style nitpicking, this is not mandatory to address.
> 
> Thanks for the reviewing.
> > 
> > On Tue, Dec 04, 2012 at 07:07:57PM +0800, Jason Wang wrote:
> > > This addes multiqueue support to virtio_net driver. In multiple queue
> > > modes, the driver expects the number of queue paris is equal to the
> > > number of vcpus. To eliminate the contention bettwen vcpus and
> > > virtqueues, per-cpu virtqueue pairs
> > > were implemented through:
> > Lots of typos above - try running ispell on it :)
> > 
> 
> Sure.
> > > - select the txq based on the smp processor id.
> > > - smp affinity hint were set to the vcpu that owns the queue pairs.
> > > 
> > > Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
> > > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > > ---
> > > 
> > >  drivers/net/virtio_net.c        |  472
> > >  ++++++++++++++++++++++++++++++--------- include/uapi/linux/virtio_net.h
> > >  |   16 ++
> > >  2 files changed, 385 insertions(+), 103 deletions(-)
> > > 
> > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > > index 266f712..912f5b2 100644
> > > --- a/drivers/net/virtio_net.c
> > > +++ b/drivers/net/virtio_net.c
> > > @@ -81,16 +81,25 @@ struct virtnet_info {
> > > 
> > >  	struct virtio_device *vdev;
> > >  	struct virtqueue *cvq;
> > >  	struct net_device *dev;
> > > 
> > > -	struct send_queue sq;
> > > -	struct receive_queue rq;
> > > +	struct send_queue *sq;
> > > +	struct receive_queue *rq;
> > > 
> > >  	unsigned int status;
> > > 
> > > +	/* Max # of queue pairs supported by the device */
> > > +	u16 max_queue_pairs;
> > > +
> > > +	/* # of queue pairs currently used by the driver */
> > > +	u16 curr_queue_pairs;
> > > +
> > > 
> > >  	/* I like... big packets and I cannot lie! */
> > >  	bool big_packets;
> > >  	
> > >  	/* Host will merge rx buffers for big packets (shake it! shake it!) */
> > >  	bool mergeable_rx_bufs;
> > > 
> > > +	/* Has control virtqueue */
> > > +	bool has_cvq;
> > > +
> > > 
> > >  	/* enable config space updates */
> > >  	bool config_enable;
> > > 
> > > @@ -125,6 +134,32 @@ struct padded_vnet_hdr {
> > > 
> > >  	char padding[6];
> > >  
> > >  };
> > > 
> > > +static const struct ethtool_ops virtnet_ethtool_ops;
> > > +
> > > +
> > > +/* Converting between virtqueue no. and kernel tx/rx queue no.
> > > + * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
> > > + */
> > > +static int vq2txq(struct virtqueue *vq)
> > > +{
> > > +	return (virtqueue_get_queue_index(vq) - 1) / 2;
> > > +}
> > > +
> > > +static int txq2vq(int txq)
> > > +{
> > > +	return txq * 2 + 1;
> > > +}
> > > +
> > > +static int vq2rxq(struct virtqueue *vq)
> > > +{
> > > +	return virtqueue_get_queue_index(vq) / 2;
> > > +}
> > > +
> > > +static int rxq2vq(int rxq)
> > > +{
> > > +	return rxq * 2;
> > > +}
> > > +
> > > 
> > >  static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb)
> > >  {
> > >  
> > >  	return (struct skb_vnet_hdr *)skb->cb;
> > > 
> > > @@ -165,7 +200,7 @@ static void skb_xmit_done(struct virtqueue *vq)
> > > 
> > >  	virtqueue_disable_cb(vq);
> > >  	
> > >  	/* We were probably waiting for more output buffers. */
> > > 
> > > -	netif_wake_queue(vi->dev);
> > > +	netif_wake_subqueue(vi->dev, vq2txq(vq));
> > > 
> > >  }
> > >  
> > >  static void set_skb_frag(struct sk_buff *skb, struct page *page,
> > > 
> > > @@ -502,7 +537,7 @@ static bool try_fill_recv(struct receive_queue *rq,
> > > gfp_t gfp)> 
> > >  static void skb_recv_done(struct virtqueue *rvq)
> > >  {
> > >  
> > >  	struct virtnet_info *vi = rvq->vdev->priv;
> > > 
> > > -	struct receive_queue *rq = &vi->rq;
> > > +	struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];
> > > 
> > >  	/* Schedule NAPI, Suppress further interrupts if successful. */
> > >  	if (napi_schedule_prep(&rq->napi)) {
> > > 
> > > @@ -532,15 +567,21 @@ static void refill_work(struct work_struct *work)
> > > 
> > >  	struct virtnet_info *vi =
> > >  	
> > >  		container_of(work, struct virtnet_info, refill.work);
> > >  	
> > >  	bool still_empty;
> > > 
> > > +	int i;
> > > +
> > > +	for (i = 0; i < vi->max_queue_pairs; i++) {
> > > +		struct receive_queue *rq = &vi->rq[i];
> > > 
> > > -	napi_disable(&vi->rq.napi);
> > > -	still_empty = !try_fill_recv(&vi->rq, GFP_KERNEL);
> > > -	virtnet_napi_enable(&vi->rq);
> > > +		napi_disable(&rq->napi);
> > > +		still_empty = !try_fill_recv(rq, GFP_KERNEL);
> > > +		virtnet_napi_enable(rq);
> > > 
> > > -	/* In theory, this can happen: if we don't get any buffers in
> > > -	 * we will *never* try to fill again. */
> > > -	if (still_empty)
> > > -		schedule_delayed_work(&vi->refill, HZ/2);
> > > +		/* In theory, this can happen: if we don't get any buffers in
> > > +		 * we will *never* try to fill again.
> > > +		 */
> > > +		if (still_empty)
> > > +			schedule_delayed_work(&vi->refill, HZ/2);
> > > +	}
> > > 
> > >  }
> > >  
> > >  static int virtnet_poll(struct napi_struct *napi, int budget)
> > > 
> > > @@ -650,7 +691,8 @@ static int xmit_skb(struct send_queue *sq, struct
> > > sk_buff *skb)> 
> > >  static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device
> > >  *dev)
> > >  {
> > >  
> > >  	struct virtnet_info *vi = netdev_priv(dev);
> > > 
> > > -	struct send_queue *sq = &vi->sq;
> > > +	int qnum = skb_get_queue_mapping(skb);
> > > +	struct send_queue *sq = &vi->sq[qnum];
> > > 
> > >  	int capacity;
> > >  	
> > >  	/* Free up any pending old buffers before queueing new ones. */
> > > 
> > > @@ -664,13 +706,14 @@ static netdev_tx_t start_xmit(struct sk_buff *skb,
> > > struct net_device *dev)> 
> > >  		if (likely(capacity == -ENOMEM)) {
> > >  		
> > >  			if (net_ratelimit())
> > >  			
> > >  				dev_warn(&dev->dev,
> > > 
> > > -					 "TX queue failure: out of memory\n");
> > > +					 "TXQ (%d) failure: out of memory\n",
> > > +					 qnum);
> > > 
> > >  		} else {
> > >  		
> > >  			dev->stats.tx_fifo_errors++;
> > >  			if (net_ratelimit())
> > >  			
> > >  				dev_warn(&dev->dev,
> > > 
> > > -					 "Unexpected TX queue failure: %d\n",
> > > -					 capacity);
> > > +					 "Unexpected TXQ (%d) failure: %d\n",
> > > +					 qnum, capacity);
> > > 
> > >  		}
> > >  		dev->stats.tx_dropped++;
> > >  		kfree_skb(skb);
> > > 
> > > @@ -685,12 +728,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb,
> > > struct net_device *dev)> 
> > >  	/* Apparently nice girls don't return TX_BUSY; stop the queue
> > >  	
> > >  	 * before it gets out of hand.  Naturally, this wastes entries. */
> > >  	
> > >  	if (capacity < 2+MAX_SKB_FRAGS) {
> > > 
> > > -		netif_stop_queue(dev);
> > > +		netif_stop_subqueue(dev, qnum);
> > > 
> > >  		if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
> > >  		
> > >  			/* More just got used, free them then recheck. */
> > >  			capacity += free_old_xmit_skbs(sq);
> > >  			if (capacity >= 2+MAX_SKB_FRAGS) {
> > > 
> > > -				netif_start_queue(dev);
> > > +				netif_start_subqueue(dev, qnum);
> > > 
> > >  				virtqueue_disable_cb(sq->vq);
> > >  			
> > >  			}
> > >  		
> > >  		}
> > > 
> > > @@ -758,23 +801,13 @@ static struct rtnl_link_stats64
> > > *virtnet_stats(struct net_device *dev,> 
> > >  static void virtnet_netpoll(struct net_device *dev)
> > >  {
> > >  
> > >  	struct virtnet_info *vi = netdev_priv(dev);
> > > 
> > > +	int i;
> > > 
> > > -	napi_schedule(&vi->rq.napi);
> > > +	for (i = 0; i < vi->curr_queue_pairs; i++)
> > > +		napi_schedule(&vi->rq[i].napi);
> > > 
> > >  }
> > >  #endif
> > > 
> > > -static int virtnet_open(struct net_device *dev)
> > > -{
> > > -	struct virtnet_info *vi = netdev_priv(dev);
> > > -
> > > -	/* Make sure we have some buffers: if oom use wq. */
> > > -	if (!try_fill_recv(&vi->rq, GFP_KERNEL))
> > > -		schedule_delayed_work(&vi->refill, 0);
> > > -
> > > -	virtnet_napi_enable(&vi->rq);
> > > -	return 0;
> > > -}
> > > -
> > > 
> > >  /*
> > >  
> > >   * Send command via the control virtqueue and check status.  Commands
> > >   * supported by the hypervisor, as indicated by feature bits, should
> > > 
> > > @@ -830,13 +863,51 @@ static void virtnet_ack_link_announce(struct
> > > virtnet_info *vi)> 
> > >  	rtnl_unlock();
> > >  
> > >  }
> > > 
> > > +/* Caller check the support of cvq and multiqueue. */
> > > +static int virtnet_set_queues(struct virtnet_info *vi)
> > > +{
> > > +	struct scatterlist sg;
> > > +	struct virtio_net_ctrl_rfs s;
> > > +	struct net_device *dev = vi->dev;
> > > +
> > > +	s.virtqueue_pairs = vi->curr_queue_pairs;
> > > +	sg_init_one(&sg, &s, sizeof(s));
> > > +
> > > +	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RFS,
> > > +				  VIRTIO_NET_CTRL_RFS_VQ_PAIRS_SET, &sg, 1, 0)){
> > > +		dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
> > > +			 vi->curr_queue_pairs);
> > > +		return -EINVAL;
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static int virtnet_open(struct net_device *dev)
> > 
> > Why move this here? diff will be smaller if you don't.
> 
> Ok, will move it back.
> > 
> > > +{
> > > +	struct virtnet_info *vi = netdev_priv(dev);
> > > +	int i;
> > > +
> > > +	for (i = 0; i < vi->max_queue_pairs; i++) {
> > > +		/* Make sure we have some buffers: if oom use wq. */
> > > +		if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
> > > +			schedule_delayed_work(&vi->refill, 0);
> > > +		virtnet_napi_enable(&vi->rq[i]);
> > > +	}
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > 
> > >  static int virtnet_close(struct net_device *dev)
> > >  {
> > >  
> > >  	struct virtnet_info *vi = netdev_priv(dev);
> > > 
> > > +	int i;
> > > 
> > >  	/* Make sure refill_work doesn't re-enable napi! */
> > >  	cancel_delayed_work_sync(&vi->refill);
> > > 
> > > -	napi_disable(&vi->rq.napi);
> > > +
> > > +	for (i = 0; i < vi->max_queue_pairs; i++)
> > > +		napi_disable(&vi->rq[i].napi);
> > > 
> > >  	return 0;
> > >  
> > >  }
> > > 
> > > @@ -948,8 +1019,8 @@ static void virtnet_get_ringparam(struct net_device
> > > *dev,> 
> > >  {
> > >  
> > >  	struct virtnet_info *vi = netdev_priv(dev);
> > > 
> > > -	ring->rx_max_pending = virtqueue_get_vring_size(vi->rq.vq);
> > > -	ring->tx_max_pending = virtqueue_get_vring_size(vi->sq.vq);
> > > +	ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq);
> > > +	ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq);
> > > 
> > >  	ring->rx_pending = ring->rx_max_pending;
> > >  	ring->tx_pending = ring->tx_max_pending;
> > >  
> > >  }
> > > 
> > > @@ -967,12 +1038,6 @@ static void virtnet_get_drvinfo(struct net_device
> > > *dev,> 
> > >  }
> > > 
> > > -static const struct ethtool_ops virtnet_ethtool_ops = {
> > > -	.get_drvinfo = virtnet_get_drvinfo,
> > > -	.get_link = ethtool_op_get_link,
> > > -	.get_ringparam = virtnet_get_ringparam,
> > > -};
> > > -
> > > 
> > >  #define MIN_MTU 68
> > >  #define MAX_MTU 65535
> > > 
> > > @@ -984,6 +1049,21 @@ static int virtnet_change_mtu(struct net_device
> > > *dev, int new_mtu)> 
> > >  	return 0;
> > >  
> > >  }
> > > 
> > > +/* To avoid contending a lock hold by a vcpu who would exit to host,
> > > select the + * txq based on the processor id.
> > > + * TODO: handle cpu hotplug.
> > > + */
> > > +static u16 virtnet_select_queue(struct net_device *dev, struct sk_buff
> > > *skb) +{
> > > +	int txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) :
> > > +		  smp_processor_id();
> > > +
> > > +	while (unlikely(txq >= dev->real_num_tx_queues))
> > > +		txq -= dev->real_num_tx_queues;
> > > +
> > > +	return txq;
> > > +}
> > > +
> > > 
> > >  static const struct net_device_ops virtnet_netdev = {
> > >  
> > >  	.ndo_open            = virtnet_open,
> > >  	.ndo_stop   	     = virtnet_close,
> > > 
> > > @@ -995,6 +1075,7 @@ static const struct net_device_ops virtnet_netdev = {
> > > 
> > >  	.ndo_get_stats64     = virtnet_stats,
> > >  	.ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
> > >  	.ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
> > > 
> > > +	.ndo_select_queue     = virtnet_select_queue,
> > > 
> > >  #ifdef CONFIG_NET_POLL_CONTROLLER
> > >  
> > >  	.ndo_poll_controller = virtnet_netpoll,
> > >  
> > >  #endif
> > > 
> > > @@ -1030,10 +1111,10 @@ static void virtnet_config_changed_work(struct
> > > work_struct *work)> 
> > >  	if (vi->status & VIRTIO_NET_S_LINK_UP) {
> > >  	
> > >  		netif_carrier_on(vi->dev);
> > > 
> > > -		netif_wake_queue(vi->dev);
> > > +		netif_tx_wake_all_queues(vi->dev);
> > > 
> > >  	} else {
> > >  	
> > >  		netif_carrier_off(vi->dev);
> > > 
> > > -		netif_stop_queue(vi->dev);
> > > +		netif_tx_stop_all_queues(vi->dev);
> > > 
> > >  	}
> > >  
> > >  done:
> > >  	mutex_unlock(&vi->config_lock);
> > > 
> > > @@ -1046,41 +1127,219 @@ static void virtnet_config_changed(struct
> > > virtio_device *vdev)> 
> > >  	schedule_work(&vi->config_work);
> > >  
> > >  }
> > > 
> > > -static int init_vqs(struct virtnet_info *vi)
> > > +static void free_receive_bufs(struct virtnet_info *vi)
> > > 
> > >  {
> > > 
> > > -	struct virtqueue *vqs[3];
> > > -	vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL};
> > > -	const char *names[] = { "input", "output", "control" };
> > > -	int nvqs, err;
> > > +	int i;
> > > 
> > > -	/* We expect two virtqueues, receive then send,
> > > -	 * and optionally control. */
> > > -	nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
> > > +	for (i = 0; i < vi->max_queue_pairs; i++) {
> > > +		while (vi->rq[i].pages)
> > > +			__free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
> > > +	}
> > > +}
> > > 
> > > -	err = vi->vdev->config->find_vqs(vi->vdev, nvqs, vqs, callbacks, names);
> > > -	if (err)
> > > -		return err;
> > > +/* Free memory allocated for send and receive queues */
> > > +static void virtnet_free_queues(struct virtnet_info *vi)
> > 
> > I think it's cleaner to open-code this, this way
> > during error handling you can have:
> > 
> > 	kfree(vi->rq);
> > err_rq:
> > 	kfree(vi->sq);
> > err_sq:
> > 
> > without tricks like malloc them both then goto end.
> 
> Ok.
> > 
> > > +{
> > > +	kfree(vi->rq);
> > > +	vi->rq = NULL;
> > > +	kfree(vi->sq);
> > > +	vi->sq = NULL;
> > 
> > I think = NULL is not needed - we never call this twice.
> 
> Sure, will remove it.
> > 
> > > +}
> > > +
> > > +static void free_unused_bufs(struct virtnet_info *vi)
> > > +{
> > > +	void *buf;
> > > +	int i;
> > > +
> > > +	for (i = 0; i < vi->max_queue_pairs; i++) {
> > > +		struct virtqueue *vq = vi->sq[i].vq;
> > > +		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
> > > +			dev_kfree_skb(buf);
> > > +	}
> > > +
> > > +	for (i = 0; i < vi->max_queue_pairs; i++) {
> > > +		struct virtqueue *vq = vi->rq[i].vq;
> > > +
> > > +		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
> > > +			if (vi->mergeable_rx_bufs || vi->big_packets)
> > > +				give_pages(&vi->rq[i], buf);
> > > +			else
> > > +				dev_kfree_skb(buf);
> > > +			--vi->rq[i].num;
> > > +		}
> > > +		BUG_ON(vi->rq[i].num != 0);
> > > +	}
> > > +}
> > > +
> > > +static void virtnet_set_affinity(struct virtnet_info *vi, bool set)
> > > +{
> > > +	int i;
> > > +
> > > +	/* Don't set the affinity hint when in single queue mode or we have too
> > > +	 * much online cpus.
> > > +	 */
> > 
> > Pls remove this comment, or replace with one explaining
> > the motivation for this logic.
> 
> Will replace with the motivation.
> > 
> > > +	if (vi->curr_queue_pairs == 1 ||
> > > +	    vi->max_queue_pairs > num_online_cpus())
> > 
> > If we have less it's not a good idea either, is it?
> > So check vi->max_queue_pairs != num_online_cpus().
> 
> Ok.
> > 
> > > +		set = false;
> > 
> > This will overwrite affinity if it was set by userspace.
> > Just
> > 	if (set)
> > 		return;
> > will not have this problem.
> 
> But we need handle the situtaiton when switch back to sq from mq mode. 
> Otherwise we may still get the affinity hint used in mq.
>  This kind of overwrite 
> is unavoidable or is there some method to detect whether userspac write 
> something new?

If we didn't set the affinity originally we should not overwrite it.
I think this means we need a flag that tells us that
virtio set the affinity.

> > 
> > > +	for (i = 0; i < vi->max_queue_pairs; i++) {
> > > +		int cpu = set ? i : -1;
> > > +		virtqueue_set_affinity(vi->rq[i].vq, cpu);
> > > +		virtqueue_set_affinity(vi->sq[i].vq, cpu);
> > > +	}
> > > +}
> > > +
> > > +static void virtnet_del_vqs(struct virtnet_info *vi)
> > 
> > It might be a good idea to add this function in previous
> > patch.
> 
> Yes, good suggetion.
> > 
> > > +{
> > > +	struct virtio_device *vdev = vi->vdev;
> > > +
> > > +	virtnet_set_affinity(vi, false);
> > > +
> > > +	vdev->config->del_vqs(vdev);
> > > 
> > > -	vi->rq.vq = vqs[0];
> > > -	vi->sq.vq = vqs[1];
> > > +	virtnet_free_queues(vi);
> > > +}
> > > 
> > > -	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
> > > -		vi->cvq = vqs[2];
> > > +static int virtnet_find_vqs(struct virtnet_info *vi)
> > > +{
> > > +	vq_callback_t **callbacks;
> > > +	struct virtqueue **vqs;
> > > +	int ret = -ENOMEM;
> > > +	int i, total_vqs;
> > > +	char **names;
> > > +
> > > +	/* We expect 1 RX virtqueue followed by 1 TX virtqueue, followd by
> > 
> > followed
> > 
> 
> Ok, will correct it.
> > > +	 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
> > > +	 * possible control vq.
> > > +	 */
> > > +	total_vqs = vi->max_queue_pairs * 2 +
> > > +		    virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);
> > > +
> > > +	/* Allocate space for find_vqs parameters */
> > > +	vqs = kzalloc(total_vqs * sizeof(*vqs), GFP_KERNEL);
> > > +	callbacks = kzalloc(total_vqs * sizeof(*callbacks), GFP_KERNEL);
> > > +	if (!vqs || !callbacks)
> > > +		goto err_mem;
> > > +	names = kzalloc(total_vqs * sizeof(*names), GFP_KERNEL);
> > 
> > Why kzalloc? You seem to fill in all of it. Pls just use kmalloc
> > and initialize fields.
> > 
> 
> Right.
> > > +	if (!names)
> > > +		goto err_mem;
> > 
> > Since you have separate goto here it's more consistent
> > to use a separate label and two if tests above.
> > 
> 
> Sure.
> > > +
> > > +	/* Parameters for control virtqueue, if any */
> > > +	if (vi->has_cvq) {
> > > +		callbacks[total_vqs - 1] = NULL;
> > > +		names[total_vqs - 1] = kasprintf(GFP_KERNEL, "control");
> > > +	}
> > > 
> > > +	/* Allocate/initialize parameters for send/receive virtqueues */
> > > +	for (i = 0; i < vi->max_queue_pairs; i++) {
> > > +		callbacks[rxq2vq(i)] = skb_recv_done;
> > > +		callbacks[txq2vq(i)] = skb_xmit_done;
> > > +		names[rxq2vq(i)] = kasprintf(GFP_KERNEL, "input.%d", i);
> > > +		names[txq2vq(i)] = kasprintf(GFP_KERNEL, "output.%d", i);
> > > +	}
> > 
> > We would need to check kasprintf return value.
> 
> Looks like a better method is to make the name as a memeber of receive_queue 
> and send_queue, and use sprintf here.
> > Also if you allocate names from slab we'll need to free them
> > later.
> 
> Then it could be freed when the send_queue and receive_queue is freed.
> > It's probably easier to just use fixed names for now -
> > it's not like the index is really useful.
> 
> Looks useful for debugging e.g. check whether the irq distribution is as 
> expected.

Well it doesn't really matter which one goes where, right?
As long as interrupts are distributed well.

> > 
> > > +
> > > +	ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
> > > +					 (const char **)names);
> > 
> > Please avoid casts, use a proper type for names.
> 
> I'm consider we need a minor change in this api, we need allocate the names 
> dynamically which could not be a const char **.

I don't see why. Any use that allocates on the fly as
you did would leak memory. Any use like you suggest
e.g. allocating as part of send/receive structure
would be fine.

> > 
> > > +	if (ret)
> > > +		goto err_names;
> > > +
> > > +	if (vi->has_cvq) {
> > > +		vi->cvq = vqs[total_vqs - 1];
> > > 
> > >  		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
> > >  		
> > >  			vi->dev->features |= NETIF_F_HW_VLAN_FILTER;
> > >  	
> > >  	}
> > > 
> > > +
> > > +	for (i = 0; i < vi->max_queue_pairs; i++) {
> > > +		vi->rq[i].vq = vqs[rxq2vq(i)];
> > > +		vi->sq[i].vq = vqs[txq2vq(i)];
> > > +	}
> > > +
> > > +	kfree(callbacks);
> > > +	kfree(vqs);
> > 
> > Who frees names if there's no error?
> > 
> 
> The virtio core does not copy the name, so it need this and only used for 
> debugging if I'm reading the code correctly.

No, virtio core does not free either individual vq name or the names
array passed in. So this leaks memory.

> > > +
> > > +	return 0;
> > > +
> > > +err_names:
> > > +	for (i = 0; i < total_vqs * 2; i++)
> > 
> > Why * 2?
> > This looks like a bug.
> 
> Yes, it's a bug.
> > 
> > > +		kfree(names[i]);
> > > +	kfree(names);
> > > +
> > > +err_mem:
> > > +	kfree(callbacks);
> > > +	kfree(vqs);
> > > +
> > > +	return ret;
> > > +}
> > > +
> > > +static int virtnet_alloc_queues(struct virtnet_info *vi)
> > > +{
> > > +	int i;
> > > +
> > > +	vi->sq = kzalloc(sizeof(vi->sq[0]) * vi->max_queue_pairs, GFP_KERNEL);
> > > +	vi->rq = kzalloc(sizeof(vi->rq[0]) * vi->max_queue_pairs, GFP_KERNEL);
> > 
> > While equivalent, *vi->rq is clearer IMHO.
> 
> Ok.
> > 
> > > +	if (!vi->rq || !vi->sq)
> > > +		goto err;
> > > +
> > > +	INIT_DELAYED_WORK(&vi->refill, refill_work);
> > > +	/* setup initial receive and send queue parameters */
> > 
> > Pls remove this comment, it's confuses more than it clarifies.
> 
> Sure.
> > 
> > > +	for (i = 0; i < vi->max_queue_pairs; i++) {
> > > +		vi->rq[i].pages = NULL;
> > > +		netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
> > > +			       napi_weight);
> > > +
> > > +		sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
> > > +		sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
> > > +	}
> > > +
> > > +
> > 
> > Extra empty line.
> > 
> 
> Will remove this line.
> > > +	return 0;
> > > +
> > > +err:
> > > +	virtnet_free_queues(vi);
> > > +	return -ENOMEM;
> > > +}
> > > +
> > > +static int init_vqs(struct virtnet_info *vi)
> > > +{
> > > +	int ret;
> > > +
> > > +	/* Allocate send & receive queues */
> > > +	ret = virtnet_alloc_queues(vi);
> > > +	if (ret)
> > > +		goto err;
> > > +
> > > +	ret = virtnet_find_vqs(vi);
> > > +	if (ret)
> > > +		goto err_free;
> > > +
> > > +	virtnet_set_affinity(vi, true);
> > > 
> > >  	return 0;
> > > 
> > > +
> > > +err_free:
> > > +	virtnet_free_queues(vi);
> > > +err:
> > > +	return ret;
> > > 
> > >  }
> > >  
> > >  static int virtnet_probe(struct virtio_device *vdev)
> > >  {
> > > 
> > > -	int err;
> > > +	int i, err;
> > > 
> > >  	struct net_device *dev;
> > >  	struct virtnet_info *vi;
> > > 
> > > +	u16 max_queue_pairs;
> > > +
> > > +	/* Find if host supports multiqueue virtio_net device */
> > > +	err = virtio_config_val(vdev, VIRTIO_NET_F_RFS,
> > > +				offsetof(struct virtio_net_config,
> > > +				max_virtqueue_pairs), &max_queue_pairs);
> > > +
> > > +	/* We need at least 2 queue's */
> > > +	if (err || max_queue_pairs < VIRTIO_NET_CTRL_RFS_VQ_PAIRS_MIN ||
> > > +	    max_queue_pairs > VIRTIO_NET_CTRL_RFS_VQ_PAIRS_MAX)
> > 
> > Check has_cvq as well.
> > 
> 
> Sure.
> > > +		max_queue_pairs = 1;
> > > 
> > >  	/* Allocate ourselves a network device with room for our info */
> > > 
> > > -	dev = alloc_etherdev(sizeof(struct virtnet_info));
> > > +	dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
> > > 
> > >  	if (!dev)
> > >  	
> > >  		return -ENOMEM;
> > > 
> > > @@ -1127,22 +1386,17 @@ static int virtnet_probe(struct virtio_device
> > > *vdev)> 
> > >  	/* Set up our device-specific information */
> > >  	vi = netdev_priv(dev);
> > > 
> > > -	netif_napi_add(dev, &vi->rq.napi, virtnet_poll, napi_weight);
> > > 
> > >  	vi->dev = dev;
> > >  	vi->vdev = vdev;
> > >  	vdev->priv = vi;
> > > 
> > > -	vi->rq.pages = NULL;
> > > 
> > >  	vi->stats = alloc_percpu(struct virtnet_stats);
> > >  	err = -ENOMEM;
> > >  	if (vi->stats == NULL)
> > >  	
> > >  		goto free;
> > > 
> > > -	INIT_DELAYED_WORK(&vi->refill, refill_work);
> > > 
> > >  	mutex_init(&vi->config_lock);
> > >  	vi->config_enable = true;
> > >  	INIT_WORK(&vi->config_work, virtnet_config_changed_work);
> > > 
> > > -	sg_init_table(vi->rq.sg, ARRAY_SIZE(vi->rq.sg));
> > > -	sg_init_table(vi->sq.sg, ARRAY_SIZE(vi->sq.sg));
> > > 
> > >  	/* If we can receive ANY GSO packets, we must allocate large ones. */
> > >  	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
> > > 
> > > @@ -1153,10 +1407,21 @@ static int virtnet_probe(struct virtio_device
> > > *vdev)> 
> > >  	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
> > >  	
> > >  		vi->mergeable_rx_bufs = true;
> > > 
> > > +	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
> > > +		vi->has_cvq = true;
> > > +
> > > +	/* Use single tx/rx queue pair as default */
> > > +	vi->curr_queue_pairs = 1;
> > > +	vi->max_queue_pairs = max_queue_pairs;
> > > +
> > > +	/* Allocate/initialize the rx/tx queues, and invoke find_vqs */
> > > 
> > >  	err = init_vqs(vi);
> > >  	if (err)
> > >  	
> > >  		goto free_stats;
> > > 
> > > +	netif_set_real_num_tx_queues(dev, 1);
> > > +	netif_set_real_num_rx_queues(dev, 1);
> > > +
> > > 
> > >  	err = register_netdev(dev);
> > >  	if (err) {
> > >  	
> > >  		pr_debug("virtio_net: registering device failed\n");
> > > 
> > > @@ -1164,12 +1429,15 @@ static int virtnet_probe(struct virtio_device
> > > *vdev)> 
> > >  	}
> > >  	
> > >  	/* Last of all, set up some receive buffers. */
> > > 
> > > -	try_fill_recv(&vi->rq, GFP_KERNEL);
> > > -
> > > -	/* If we didn't even get one input buffer, we're useless. */
> > > -	if (vi->rq.num == 0) {
> > > -		err = -ENOMEM;
> > > -		goto unregister;
> > > +	for (i = 0; i < vi->max_queue_pairs; i++) {
> > > +		try_fill_recv(&vi->rq[i], GFP_KERNEL);
> > > +
> > > +		/* If we didn't even get one input buffer, we're useless. */
> > > +		if (vi->rq[i].num == 0) {
> > > +			free_unused_bufs(vi);
> > > +			err = -ENOMEM;
> > > +			goto free_recv_bufs;
> > > +		}
> > > 
> > >  	}
> > >  	
> > >  	/* Assume link up if device can't report link status,
> > > 
> > > @@ -1182,13 +1450,19 @@ static int virtnet_probe(struct virtio_device
> > > *vdev)> 
> > >  		netif_carrier_on(dev);
> > >  	
> > >  	}
> > > 
> > > -	pr_debug("virtnet: registered device %s\n", dev->name);
> > > +	pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
> > > +		 dev->name, max_queue_pairs);
> > > +
> > > 
> > >  	return 0;
> > > 
> > > -unregister:
> > > +free_recv_bufs:
> > > +	free_receive_bufs(vi);
> > > 
> > >  	unregister_netdev(dev);
> > > 
> > > +
> > > 
> > >  free_vqs:
> > > -	vdev->config->del_vqs(vdev);
> > > +	cancel_delayed_work_sync(&vi->refill);
> > > +	virtnet_del_vqs(vi);
> > > +
> > > 
> > >  free_stats:
> > >  	free_percpu(vi->stats);
> > >  
> > >  free:
> > > @@ -1196,28 +1470,6 @@ free:
> > >  	return err;
> > >  
> > >  }
> > > 
> > > -static void free_unused_bufs(struct virtnet_info *vi)
> > > -{
> > > -	void *buf;
> > > -	while (1) {
> > > -		buf = virtqueue_detach_unused_buf(vi->sq.vq);
> > > -		if (!buf)
> > > -			break;
> > > -		dev_kfree_skb(buf);
> > > -	}
> > > -	while (1) {
> > > -		buf = virtqueue_detach_unused_buf(vi->rq.vq);
> > > -		if (!buf)
> > > -			break;
> > > -		if (vi->mergeable_rx_bufs || vi->big_packets)
> > > -			give_pages(&vi->rq, buf);
> > > -		else
> > > -			dev_kfree_skb(buf);
> > > -		--vi->rq.num;
> > > -	}
> > > -	BUG_ON(vi->rq.num != 0);
> > > -}
> > > -
> > > 
> > >  static void remove_vq_common(struct virtnet_info *vi)
> > >  {
> > >  
> > >  	vi->vdev->config->reset(vi->vdev);
> > > 
> > > @@ -1225,10 +1477,9 @@ static void remove_vq_common(struct virtnet_info
> > > *vi)> 
> > >  	/* Free unused buffers in both send and recv, if any. */
> > >  	free_unused_bufs(vi);
> > > 
> > > -	vi->vdev->config->del_vqs(vi->vdev);
> > > +	free_receive_bufs(vi);
> > > 
> > > -	while (vi->rq.pages)
> > > -		__free_pages(get_a_page(&vi->rq, GFP_KERNEL), 0);
> > > +	virtnet_del_vqs(vi);
> > > 
> > >  }
> > >  
> > >  static void __devexit virtnet_remove(struct virtio_device *vdev)
> > > 
> > > @@ -1254,6 +1505,7 @@ static void __devexit virtnet_remove(struct
> > > virtio_device *vdev)> 
> > >  static int virtnet_freeze(struct virtio_device *vdev)
> > >  {
> > >  
> > >  	struct virtnet_info *vi = vdev->priv;
> > > 
> > > +	int i;
> > > 
> > >  	/* Prevent config work handler from accessing the device */
> > >  	mutex_lock(&vi->config_lock);
> > > 
> > > @@ -1264,7 +1516,10 @@ static int virtnet_freeze(struct virtio_device
> > > *vdev)> 
> > >  	cancel_delayed_work_sync(&vi->refill);
> > >  	
> > >  	if (netif_running(vi->dev))
> > > 
> > > -		napi_disable(&vi->rq.napi);
> > > +		for (i = 0; i < vi->max_queue_pairs; i++) {
> > > +			napi_disable(&vi->rq[i].napi);
> > > +			netif_napi_del(&vi->rq[i].napi);
> > > +		}
> > > 
> > >  	remove_vq_common(vi);
> > > 
> > > @@ -1276,24 +1531,29 @@ static int virtnet_freeze(struct virtio_device
> > > *vdev)> 
> > >  static int virtnet_restore(struct virtio_device *vdev)
> > >  {
> > >  
> > >  	struct virtnet_info *vi = vdev->priv;
> > > 
> > > -	int err;
> > > +	int err, i;
> > > 
> > >  	err = init_vqs(vi);
> > >  	if (err)
> > >  	
> > >  		return err;
> > >  	
> > >  	if (netif_running(vi->dev))
> > > 
> > > -		virtnet_napi_enable(&vi->rq);
> > > +		for (i = 0; i < vi->max_queue_pairs; i++)
> > > +			virtnet_napi_enable(&vi->rq[i]);
> > > 
> > >  	netif_device_attach(vi->dev);
> > > 
> > > -	if (!try_fill_recv(&vi->rq, GFP_KERNEL))
> > > -		schedule_delayed_work(&vi->refill, 0);
> > > +	for (i = 0; i < vi->max_queue_pairs; i++)
> > > +		if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
> > > +			schedule_delayed_work(&vi->refill, 0);
> > > 
> > >  	mutex_lock(&vi->config_lock);
> > >  	vi->config_enable = true;
> > >  	mutex_unlock(&vi->config_lock);
> > > 
> > > +	if (vi->has_cvq && virtio_has_feature(vi->vdev, VIRTIO_NET_F_RFS))
> > > +		virtnet_set_queues(vi);
> > > +
> > 
> > I think it's easier to test
> > if (curr_queue_pairs == max_queue_pairs)
> > within virtnet_set_queues and make it
> > a NOP if so.
> 
> Still need to send the command during restore since we reset the device during 
> freezing.


Then maybe check vi->has_cvq && virtio_has_feature(vi->vdev,
VIRTIO_NET_F_RFS) in there?

> > 
> > >  	return 0;
> > >  
> > >  }
> > >  #endif
> > > 
> > > @@ -1311,7 +1571,7 @@ static unsigned int features[] = {
> > > 
> > >  	VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO,
> > >  	VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
> > >  	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
> > > 
> > > -	VIRTIO_NET_F_GUEST_ANNOUNCE,
> > > +	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_RFS,
> > > 
> > >  };
> > >  
> > >  static struct virtio_driver virtio_net_driver = {
> > > 
> > > @@ -1329,6 +1589,12 @@ static struct virtio_driver virtio_net_driver = {
> > > 
> > >  #endif
> > >  };
> > > 
> > > +static const struct ethtool_ops virtnet_ethtool_ops = {
> > > +	.get_drvinfo = virtnet_get_drvinfo,
> > > +	.get_link = ethtool_op_get_link,
> > > +	.get_ringparam = virtnet_get_ringparam,
> > > +};
> > > +
> > > 
> > >  static int __init init(void)
> > >  {
> > >  
> > >  	return register_virtio_driver(&virtio_net_driver);
> > > 
> > > diff --git a/include/uapi/linux/virtio_net.h
> > > b/include/uapi/linux/virtio_net.h index 2470f54..6056cec 100644
> > > --- a/include/uapi/linux/virtio_net.h
> > > +++ b/include/uapi/linux/virtio_net.h
> > > @@ -51,6 +51,7 @@
> > > 
> > >  #define VIRTIO_NET_F_CTRL_RX_EXTRA 20	/* Extra RX mode control support 
> */
> > >  #define VIRTIO_NET_F_GUEST_ANNOUNCE 21	/* Guest can announce device on
> > >  the
> > >  
> > >  					 * network */
> > > 
> > > +#define VIRTIO_NET_F_RFS	22	/* Device supports multiple TXQ/RXQ */
> > 
> > Should be
> > /* Device supports Receive Flow Steering. */
> 
> Ok.
> > 
> > >  #define VIRTIO_NET_S_LINK_UP	1	/* Link is up */
> > >  #define VIRTIO_NET_S_ANNOUNCE	2	/* Announcement is needed */
> > > 
> > > @@ -60,6 +61,8 @@ struct virtio_net_config {
> > > 
> > >  	__u8 mac[6];
> > >  	/* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
> > >  	__u16 status;
> > > 
> > > +	/* Total number of RX/TX queues */
> > 
> > Better comment:
> > /* Maximum number of each of transmit and receive queues;
> >  * see VIRTIO_NET_F_RFS and VIRTIO_NET_CTRL_RFS.
> >  * Legal values are between 1 and 0x8000
> >  */
> 
> Sure.
> > 
> > > +	__u16 max_virtqueue_pairs;
> > > 
> > >  } __attribute__((packed));
> > >  
> > >  /* This is the first element of the scatter-gather list.  If you don't
> > > 
> > > @@ -166,4 +169,17 @@ struct virtio_net_ctrl_mac {
> > > 
> > >  #define VIRTIO_NET_CTRL_ANNOUNCE       3
> > >  
> > >   #define VIRTIO_NET_CTRL_ANNOUNCE_ACK         0
> > > 
> > > +/*
> > > + * Control multiqueue
> > 
> > Here's a better comment:
> > 
> > Control Receive Flow Steering
> > 
> >  The command VIRTIO_NET_CTRL_RFS_VQ_PAIRS_SET
> >  enables Receive Flow Steering, specifying the number of the transmit and
> > receive queues that will be used.
> >  After the command is consumed and acked by the device,
> >  the device will not steer new packets on receive virtqueues
> >  other than specified nor read from transmit virtqueues other than
> > specified. Accordingly, driver should not transmit new packets
> >  on virtqueues other than specified.
> > 
> 
> Sure, thanks for the this reminding.
> > > + *
> > 
> > Remove this empty line.
> 
> Ok.
> > 
> > > + */
> > > +struct virtio_net_ctrl_rfs {
> > 
> > /* Number of each of transmit and receive queues to use;
> >  * Legal values are between 1 and max_virtqueue_pairs
> >  */
> > 
> > > +	u16 virtqueue_pairs;
> > > +};
> > > +
> > > +#define VIRTIO_NET_CTRL_RFS   4
> > > + #define VIRTIO_NET_CTRL_RFS_VQ_PAIRS_SET        0
> > 
> > /* Value range for max_virtqueue_pairsfor and virtqueue_pairs above */
> > 
> > > + #define VIRTIO_NET_CTRL_RFS_VQ_PAIRS_MIN        1
> > > + #define VIRTIO_NET_CTRL_RFS_VQ_PAIRS_MAX        0x8000
> > > +
> > > 
> > >  #endif /* _LINUX_VIRTIO_NET_H */
> > 
> > --
> > To unsubscribe from this list: send the line "unsubscribe kvm" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: net, batman: lockdep circular dependency warning
From: Sven Eckelmann @ 2012-12-04 14:51 UTC (permalink / raw)
  To: b.a.t.m.a.n, netdev; +Cc: Sasha Levin
In-Reply-To: <50A15E1B.1010001@oracle.com>

[-- Attachment #1: Type: text/plain, Size: 4997 bytes --]

Hi,

thanks for your report. It seems nobody else wanted to give an answer... so I 
try to give a small overview.

On Monday 12 November 2012 15:37:47 Sasha Levin wrote:
> Hi all,
> 
> While fuzzing with trinity inside a KVM tools (lkvm) guest running latest
> -next kernel, I've stumbled on the following:
> 
> [ 1002.969392] ======================================================
> [ 1002.971639] [ INFO: possible circular locking dependency detected ]
> [ 1002.975805] 3.7.0-rc5-next-20121112-sasha-00018-g2f4ce0e #127 Tainted: G 
>       W [ 1002.983691]
> ------------------------------------------------------- [ 1002.983691]
> trinity-child18/8149 is trying to acquire lock:
> [ 1002.983691]  (s_active#313){++++.+}, at: [<ffffffff812f9941>]
> sysfs_addrm_finish+0x31/0x60 [ 1002.983691]
> [ 1002.983691] but task is already holding lock:
> [ 1002.983691]  (rtnl_mutex){+.+.+.}, at: [<ffffffff834fcc62>]
> rtnl_lock+0x12/0x20 [ 1002.983691]
> [ 1002.983691] which lock already depends on the new lock.

It is known that batman-adv has a problem with the attaching/detaching of 
interfaces over this sysfs. The cause of this problem is related to the fact 
that batman-adv not only creates its own net_devices, but also unregisters 
net_devices. This unregister will add a new element in the net_todo_list. This 
will cause a rtnl_lock when it calls netdev_wait_allrefs (there are some 
condition, but we just ignore them for now). So the whole exercise of using 
rtnl_trylock was useless.

This extra rtnl_lock can cause a deadlock as you found out because it is 
activated through a sysfs file and therefore the s_active mutex is locked (we 
have the dependency s_active -> rtnl_mutex, but other users have rtnl_mutex -> 
s_active).

So, what to do? There are different possibilities. We have to keep in mind 
that there is a patchset (not yet accepted by the batman-adv maintainers) 
which allows to use `ip link` or compatible tools to create/destroy batman-adv 
devices and attach/detach other devices.

1. Remove the sysfs interface to attach/detach net_devices (which
   destroys/creates batman-adv devices)

   This is not really backward compatible and therefore not really acceptable.
   Marek Lindner and Simon Wunderlich are also against forcing users to
   require special tools to add/configure batman-adv devices (even batctl, ip
   and so on).

2. Ignore the possible deadlock

   (sry, fill in your own comment...)

3. Add workarounds in the core net code

   Simon Wunderlich already tried it... I personally think it is not the right
   way because it more likely to introduce more bugs by hiding a batman-adv
   bug. And these bugs are a lot harder to find... trust me

   For example the usage of __rtnl_unlock will let this bug to appear in
   other places which use rtnl_trylock. This is caused by the fact that the
   todo item isn't processed by __rtnl_unlock (this is the whole idea by
   calling it) and therefore the todo work stays in net_todo_list. Another
   user of rtnl_trylock will now call rtnl_unlock and don't expect an entry in
   net_todo_list because he never unregistered a device. So he now has the
   problem of batman-adv (what an unsocial läderlappen).

   And moving everybody using rtnl_trylock to __rtnl_unlock has still the
   problem that batman-adv don't immediatelly work on its todo and so
   maybe causes other side effects because... the notifications weren't
   sent and therefore the refcount of the unregistered device didn't went
   to zero.

   (I'll leave other side effects as homework for the reader)

4. Don't automatically remove batman-adv devices

   The current approach is to automatically unregister batman-adv devices
   when they don't have attached slave-devices (hardif called by batman-adv).
   Removing this will slightly change the behaviour, but the interface can
   still be removed using `ip link del dev bat0` or a similar tool.

5. Add a workaround solution and promote the use of the standard interface

   So, the basic problem is the s_active mutex locked by the sysfs interface.
   An idea is to postpone the part which needs the rtnl_mutex to a later time.
   This has obviously the problem that we cannot return an error code to the
   caller when the device creation failed in the postponed part. This problem
   can reduced slightly be moving only the unregister part, but now I'll leave
   this out for simplicity of the description.

   A possible implementation would create a work_struct and add it to
   batadv_event_workqueue. This work item has to contain all information given
   by the user (which hardif, name of the batman-adv device).

   Simon Wunderlich already disliked this workaround, but Antonio Quartulli
   tried to encourage an RFC implementation. I've prefered a textual
   description rather than a patch missing explanations of the other
   alternatives.

Kind regards,
	Sven

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply

* Re: [net-next PATCH V3-evictor] net: frag evictor, avoid killing warm frag queues
From: Eric Dumazet @ 2012-12-04 14:47 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: David S. Miller, Florian Westphal, netdev, Thomas Graf,
	Paul E. McKenney, Cong Wang, Herbert Xu
In-Reply-To: <20121204133007.20215.52566.stgit@dragon>

On Tue, 2012-12-04 at 14:30 +0100, Jesper Dangaard Brouer wrote:
> The fragmentation evictor system have a very unfortunate eviction
> system for killing fragment, when the system is put under pressure.
> 
> If packets are coming in too fast, the evictor code kills "warm"
> fragments too quickly.  Resulting in close to zero throughput, as
> fragments are killed before they have a chance to complete
> 
> This is related to the bad interaction with the LRU (Least Recently
> Used) list.  Under load the LRU list sort-of changes meaning/behavior.
> When the LRU head is very new/warm, then the head is most likely the
> one with most fragments and the tail (latest used or added element)
> with least.
> 
> Solved by, introducing a creation "jiffie" timestamp (creation_ts).
> If the element is tried evicted in same jiffie, then perform tail drop
> on the LRU list instead.
> 
> Signed-off-by: Jesper Dangaard Brouer <jbrouer@redhat.com>

This would only 'work' if a reassembled packet can be done/completed
under one jiffie.

For 64KB packets, this means 100Mb link wont be able to deliver a
reassembled packet under IP frags load if HZ=1000

LRU goal is to be able to select the oldest inet_frag_queue, because in
typical networks, packet losses are really happening and this is why
some packets wont complete their reassembly. They naturally will be
found on LRU head, and they probably are very fat (for example a single
packet was lost for the inet_frag_queue)

Choosing the most recent inet_frag_queue is exactly the opposite
strategy. We pay the huge cost of maintaining a central LRU, and we
exactly misuse it.

As long as an inet_frag_queue receives new fragments and is moved to the
LRU tail, its a candidate for being kept, not a candidate for being
evicted.

Only when an inet_frag_queue is the oldest one, it becomes a candidate
for eviction.

I think you are trying to solve a configuration/tuning problem by
changing a valid strategy.

Whats wrong with admitting high_thresh/low_thresh default values should
be updated, now some people apparently want to use IP fragments in
production ?

Lets say we allow to use 1 % of memory for frags, instead of the current
256 KB limit, which was chosen decades ago.

Only in very severe DOS attacks, LRU head 'creation_ts' would possibly
be <= 1ms. And under severe DOS attacks, I am afraid there is nothing we
can do.

(We could eventually avoid LRU hassle and chose instead a random drop
strategy)

high_thresh/low_thresh should be changed from 'int' to 'long' as well,
so that a 64bit host could use more than 2GB for frag storage.

^ permalink raw reply

* Re: [PATCH net-next 3/3] virtio-net: change the number of queues through ethtool
From: Jason Wang @ 2012-12-04 14:46 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: krkumar2, kvm, netdev, linux-kernel, virtualization, bhutchings,
	jwhan, davem, shiyer
In-Reply-To: <20121204134959.GG7499@redhat.com>

On Tuesday, December 04, 2012 03:49:59 PM Michael S. Tsirkin wrote:
> On Tue, Dec 04, 2012 at 07:07:58PM +0800, Jason Wang wrote:
> > This patch implement the ethtool_{set|get}_channels method of ethool to
> > allow user to change the number of queues dymaically when the device is
> > running. This would let the user to configure it on demand.
> > 
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > ---
> > 
> >  drivers/net/virtio_net.c |   44
> >  ++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 44
> >  insertions(+), 0 deletions(-)
> > 
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 912f5b2..b9f9887 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -1589,10 +1589,54 @@ static struct virtio_driver virtio_net_driver = {
> > 
> >  #endif
> >  };
> > 
> > +/* TODO: Eliminate OOO packets during switching */
> > +static int virtnet_set_channels(struct net_device *dev,
> > +				struct ethtool_channels *channels)
> > +{
> > +	struct virtnet_info *vi = netdev_priv(dev);
> > +	u16 queue_pairs = channels->combined_count;
> > +	u16 old_queue_pairs = vi->curr_queue_pairs;
> > +
> > +	/* We don't support separate rx/tx channels.
> > +	 * We don't allow setting 'other' channels.
> > +	 */
> > +	if (channels->rx_count || channels->tx_count || channels->other_count)
> > +		return -EINVAL;
> > +
> > +	if (queue_pairs > vi->max_queue_pairs)
> > +		return -EINVAL;
> > +
> > +	vi->curr_queue_pairs = queue_pairs;
> > +	if (virtnet_set_queues(vi) == 0) {
> > +		netif_set_real_num_tx_queues(dev, vi->curr_queue_pairs);
> > +		netif_set_real_num_rx_queues(dev, vi->curr_queue_pairs);
> 
> Just use queue_pairs - it's shorter.

Ok.
> 
> > +
> > +		virtnet_set_affinity(vi, true);
> > +	} else
> > +		vi->curr_queue_pairs = old_queue_pairs;
> 
> Should be
> 	ret = virtnet_set_queues(vi);
> 	if (ret) {
> 		vi->curr_queue_pairs = old_queue_pairs;
> 		return ret;
> 	}
> otherwise we loose error reporting.
> 

Right.
> Also it's better if virtnet_set_queues
> gets queue_pairs as parameter and set curr_queue_pairs
> on success.

True, looks simpler than current method, will do it in next version.
> 
> > +
> > +	return 0;
> > +}
> > +
> > +static void virtnet_get_channels(struct net_device *dev,
> > +				 struct ethtool_channels *channels)
> > +{
> > +	struct virtnet_info *vi = netdev_priv(dev);
> > +
> > +	channels->combined_count = vi->curr_queue_pairs;
> > +	channels->max_combined = vi->max_queue_pairs;
> > +	channels->max_other = 0;
> > +	channels->rx_count = 0;
> > +	channels->tx_count = 0;
> > +	channels->other_count = 0;
> > +}
> > +
> > 
> >  static const struct ethtool_ops virtnet_ethtool_ops = {
> >  
> >  	.get_drvinfo = virtnet_get_drvinfo,
> >  	.get_link = ethtool_op_get_link,
> >  	.get_ringparam = virtnet_get_ringparam,
> > 
> > +	.set_channels = virtnet_set_channels,
> > +	.get_channels = virtnet_get_channels,
> > 
> >  };
> >  
> >  static int __init init(void)
> 
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH net-next 2/3] virtio_net: multiqueue support
From: Jason Wang @ 2012-12-04 14:45 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: krkumar2, kvm, netdev, linux-kernel, virtualization, bhutchings,
	jwhan, davem, shiyer
In-Reply-To: <20121204132422.GD7499@redhat.com>

On Tuesday, December 04, 2012 03:24:22 PM Michael S. Tsirkin wrote:
> I found some bugs, see below.
> Also some style nitpicking, this is not mandatory to address.

Thanks for the reviewing.
> 
> On Tue, Dec 04, 2012 at 07:07:57PM +0800, Jason Wang wrote:
> > This addes multiqueue support to virtio_net driver. In multiple queue
> > modes, the driver expects the number of queue paris is equal to the
> > number of vcpus. To eliminate the contention bettwen vcpus and
> > virtqueues, per-cpu virtqueue pairs
> > were implemented through:
> Lots of typos above - try running ispell on it :)
> 

Sure.
> > - select the txq based on the smp processor id.
> > - smp affinity hint were set to the vcpu that owns the queue pairs.
> > 
> > Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
> > Signed-off-by: Jason Wang <jasowang@redhat.com>
> > ---
> > 
> >  drivers/net/virtio_net.c        |  472
> >  ++++++++++++++++++++++++++++++--------- include/uapi/linux/virtio_net.h
> >  |   16 ++
> >  2 files changed, 385 insertions(+), 103 deletions(-)
> > 
> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> > index 266f712..912f5b2 100644
> > --- a/drivers/net/virtio_net.c
> > +++ b/drivers/net/virtio_net.c
> > @@ -81,16 +81,25 @@ struct virtnet_info {
> > 
> >  	struct virtio_device *vdev;
> >  	struct virtqueue *cvq;
> >  	struct net_device *dev;
> > 
> > -	struct send_queue sq;
> > -	struct receive_queue rq;
> > +	struct send_queue *sq;
> > +	struct receive_queue *rq;
> > 
> >  	unsigned int status;
> > 
> > +	/* Max # of queue pairs supported by the device */
> > +	u16 max_queue_pairs;
> > +
> > +	/* # of queue pairs currently used by the driver */
> > +	u16 curr_queue_pairs;
> > +
> > 
> >  	/* I like... big packets and I cannot lie! */
> >  	bool big_packets;
> >  	
> >  	/* Host will merge rx buffers for big packets (shake it! shake it!) */
> >  	bool mergeable_rx_bufs;
> > 
> > +	/* Has control virtqueue */
> > +	bool has_cvq;
> > +
> > 
> >  	/* enable config space updates */
> >  	bool config_enable;
> > 
> > @@ -125,6 +134,32 @@ struct padded_vnet_hdr {
> > 
> >  	char padding[6];
> >  
> >  };
> > 
> > +static const struct ethtool_ops virtnet_ethtool_ops;
> > +
> > +
> > +/* Converting between virtqueue no. and kernel tx/rx queue no.
> > + * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq
> > + */
> > +static int vq2txq(struct virtqueue *vq)
> > +{
> > +	return (virtqueue_get_queue_index(vq) - 1) / 2;
> > +}
> > +
> > +static int txq2vq(int txq)
> > +{
> > +	return txq * 2 + 1;
> > +}
> > +
> > +static int vq2rxq(struct virtqueue *vq)
> > +{
> > +	return virtqueue_get_queue_index(vq) / 2;
> > +}
> > +
> > +static int rxq2vq(int rxq)
> > +{
> > +	return rxq * 2;
> > +}
> > +
> > 
> >  static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb)
> >  {
> >  
> >  	return (struct skb_vnet_hdr *)skb->cb;
> > 
> > @@ -165,7 +200,7 @@ static void skb_xmit_done(struct virtqueue *vq)
> > 
> >  	virtqueue_disable_cb(vq);
> >  	
> >  	/* We were probably waiting for more output buffers. */
> > 
> > -	netif_wake_queue(vi->dev);
> > +	netif_wake_subqueue(vi->dev, vq2txq(vq));
> > 
> >  }
> >  
> >  static void set_skb_frag(struct sk_buff *skb, struct page *page,
> > 
> > @@ -502,7 +537,7 @@ static bool try_fill_recv(struct receive_queue *rq,
> > gfp_t gfp)> 
> >  static void skb_recv_done(struct virtqueue *rvq)
> >  {
> >  
> >  	struct virtnet_info *vi = rvq->vdev->priv;
> > 
> > -	struct receive_queue *rq = &vi->rq;
> > +	struct receive_queue *rq = &vi->rq[vq2rxq(rvq)];
> > 
> >  	/* Schedule NAPI, Suppress further interrupts if successful. */
> >  	if (napi_schedule_prep(&rq->napi)) {
> > 
> > @@ -532,15 +567,21 @@ static void refill_work(struct work_struct *work)
> > 
> >  	struct virtnet_info *vi =
> >  	
> >  		container_of(work, struct virtnet_info, refill.work);
> >  	
> >  	bool still_empty;
> > 
> > +	int i;
> > +
> > +	for (i = 0; i < vi->max_queue_pairs; i++) {
> > +		struct receive_queue *rq = &vi->rq[i];
> > 
> > -	napi_disable(&vi->rq.napi);
> > -	still_empty = !try_fill_recv(&vi->rq, GFP_KERNEL);
> > -	virtnet_napi_enable(&vi->rq);
> > +		napi_disable(&rq->napi);
> > +		still_empty = !try_fill_recv(rq, GFP_KERNEL);
> > +		virtnet_napi_enable(rq);
> > 
> > -	/* In theory, this can happen: if we don't get any buffers in
> > -	 * we will *never* try to fill again. */
> > -	if (still_empty)
> > -		schedule_delayed_work(&vi->refill, HZ/2);
> > +		/* In theory, this can happen: if we don't get any buffers in
> > +		 * we will *never* try to fill again.
> > +		 */
> > +		if (still_empty)
> > +			schedule_delayed_work(&vi->refill, HZ/2);
> > +	}
> > 
> >  }
> >  
> >  static int virtnet_poll(struct napi_struct *napi, int budget)
> > 
> > @@ -650,7 +691,8 @@ static int xmit_skb(struct send_queue *sq, struct
> > sk_buff *skb)> 
> >  static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device
> >  *dev)
> >  {
> >  
> >  	struct virtnet_info *vi = netdev_priv(dev);
> > 
> > -	struct send_queue *sq = &vi->sq;
> > +	int qnum = skb_get_queue_mapping(skb);
> > +	struct send_queue *sq = &vi->sq[qnum];
> > 
> >  	int capacity;
> >  	
> >  	/* Free up any pending old buffers before queueing new ones. */
> > 
> > @@ -664,13 +706,14 @@ static netdev_tx_t start_xmit(struct sk_buff *skb,
> > struct net_device *dev)> 
> >  		if (likely(capacity == -ENOMEM)) {
> >  		
> >  			if (net_ratelimit())
> >  			
> >  				dev_warn(&dev->dev,
> > 
> > -					 "TX queue failure: out of memory\n");
> > +					 "TXQ (%d) failure: out of memory\n",
> > +					 qnum);
> > 
> >  		} else {
> >  		
> >  			dev->stats.tx_fifo_errors++;
> >  			if (net_ratelimit())
> >  			
> >  				dev_warn(&dev->dev,
> > 
> > -					 "Unexpected TX queue failure: %d\n",
> > -					 capacity);
> > +					 "Unexpected TXQ (%d) failure: %d\n",
> > +					 qnum, capacity);
> > 
> >  		}
> >  		dev->stats.tx_dropped++;
> >  		kfree_skb(skb);
> > 
> > @@ -685,12 +728,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb,
> > struct net_device *dev)> 
> >  	/* Apparently nice girls don't return TX_BUSY; stop the queue
> >  	
> >  	 * before it gets out of hand.  Naturally, this wastes entries. */
> >  	
> >  	if (capacity < 2+MAX_SKB_FRAGS) {
> > 
> > -		netif_stop_queue(dev);
> > +		netif_stop_subqueue(dev, qnum);
> > 
> >  		if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
> >  		
> >  			/* More just got used, free them then recheck. */
> >  			capacity += free_old_xmit_skbs(sq);
> >  			if (capacity >= 2+MAX_SKB_FRAGS) {
> > 
> > -				netif_start_queue(dev);
> > +				netif_start_subqueue(dev, qnum);
> > 
> >  				virtqueue_disable_cb(sq->vq);
> >  			
> >  			}
> >  		
> >  		}
> > 
> > @@ -758,23 +801,13 @@ static struct rtnl_link_stats64
> > *virtnet_stats(struct net_device *dev,> 
> >  static void virtnet_netpoll(struct net_device *dev)
> >  {
> >  
> >  	struct virtnet_info *vi = netdev_priv(dev);
> > 
> > +	int i;
> > 
> > -	napi_schedule(&vi->rq.napi);
> > +	for (i = 0; i < vi->curr_queue_pairs; i++)
> > +		napi_schedule(&vi->rq[i].napi);
> > 
> >  }
> >  #endif
> > 
> > -static int virtnet_open(struct net_device *dev)
> > -{
> > -	struct virtnet_info *vi = netdev_priv(dev);
> > -
> > -	/* Make sure we have some buffers: if oom use wq. */
> > -	if (!try_fill_recv(&vi->rq, GFP_KERNEL))
> > -		schedule_delayed_work(&vi->refill, 0);
> > -
> > -	virtnet_napi_enable(&vi->rq);
> > -	return 0;
> > -}
> > -
> > 
> >  /*
> >  
> >   * Send command via the control virtqueue and check status.  Commands
> >   * supported by the hypervisor, as indicated by feature bits, should
> > 
> > @@ -830,13 +863,51 @@ static void virtnet_ack_link_announce(struct
> > virtnet_info *vi)> 
> >  	rtnl_unlock();
> >  
> >  }
> > 
> > +/* Caller check the support of cvq and multiqueue. */
> > +static int virtnet_set_queues(struct virtnet_info *vi)
> > +{
> > +	struct scatterlist sg;
> > +	struct virtio_net_ctrl_rfs s;
> > +	struct net_device *dev = vi->dev;
> > +
> > +	s.virtqueue_pairs = vi->curr_queue_pairs;
> > +	sg_init_one(&sg, &s, sizeof(s));
> > +
> > +	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RFS,
> > +				  VIRTIO_NET_CTRL_RFS_VQ_PAIRS_SET, &sg, 1, 0)){
> > +		dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
> > +			 vi->curr_queue_pairs);
> > +		return -EINVAL;
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > +static int virtnet_open(struct net_device *dev)
> 
> Why move this here? diff will be smaller if you don't.

Ok, will move it back.
> 
> > +{
> > +	struct virtnet_info *vi = netdev_priv(dev);
> > +	int i;
> > +
> > +	for (i = 0; i < vi->max_queue_pairs; i++) {
> > +		/* Make sure we have some buffers: if oom use wq. */
> > +		if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
> > +			schedule_delayed_work(&vi->refill, 0);
> > +		virtnet_napi_enable(&vi->rq[i]);
> > +	}
> > +
> > +	return 0;
> > +}
> > +
> > 
> >  static int virtnet_close(struct net_device *dev)
> >  {
> >  
> >  	struct virtnet_info *vi = netdev_priv(dev);
> > 
> > +	int i;
> > 
> >  	/* Make sure refill_work doesn't re-enable napi! */
> >  	cancel_delayed_work_sync(&vi->refill);
> > 
> > -	napi_disable(&vi->rq.napi);
> > +
> > +	for (i = 0; i < vi->max_queue_pairs; i++)
> > +		napi_disable(&vi->rq[i].napi);
> > 
> >  	return 0;
> >  
> >  }
> > 
> > @@ -948,8 +1019,8 @@ static void virtnet_get_ringparam(struct net_device
> > *dev,> 
> >  {
> >  
> >  	struct virtnet_info *vi = netdev_priv(dev);
> > 
> > -	ring->rx_max_pending = virtqueue_get_vring_size(vi->rq.vq);
> > -	ring->tx_max_pending = virtqueue_get_vring_size(vi->sq.vq);
> > +	ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq);
> > +	ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq);
> > 
> >  	ring->rx_pending = ring->rx_max_pending;
> >  	ring->tx_pending = ring->tx_max_pending;
> >  
> >  }
> > 
> > @@ -967,12 +1038,6 @@ static void virtnet_get_drvinfo(struct net_device
> > *dev,> 
> >  }
> > 
> > -static const struct ethtool_ops virtnet_ethtool_ops = {
> > -	.get_drvinfo = virtnet_get_drvinfo,
> > -	.get_link = ethtool_op_get_link,
> > -	.get_ringparam = virtnet_get_ringparam,
> > -};
> > -
> > 
> >  #define MIN_MTU 68
> >  #define MAX_MTU 65535
> > 
> > @@ -984,6 +1049,21 @@ static int virtnet_change_mtu(struct net_device
> > *dev, int new_mtu)> 
> >  	return 0;
> >  
> >  }
> > 
> > +/* To avoid contending a lock hold by a vcpu who would exit to host,
> > select the + * txq based on the processor id.
> > + * TODO: handle cpu hotplug.
> > + */
> > +static u16 virtnet_select_queue(struct net_device *dev, struct sk_buff
> > *skb) +{
> > +	int txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) :
> > +		  smp_processor_id();
> > +
> > +	while (unlikely(txq >= dev->real_num_tx_queues))
> > +		txq -= dev->real_num_tx_queues;
> > +
> > +	return txq;
> > +}
> > +
> > 
> >  static const struct net_device_ops virtnet_netdev = {
> >  
> >  	.ndo_open            = virtnet_open,
> >  	.ndo_stop   	     = virtnet_close,
> > 
> > @@ -995,6 +1075,7 @@ static const struct net_device_ops virtnet_netdev = {
> > 
> >  	.ndo_get_stats64     = virtnet_stats,
> >  	.ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid,
> >  	.ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid,
> > 
> > +	.ndo_select_queue     = virtnet_select_queue,
> > 
> >  #ifdef CONFIG_NET_POLL_CONTROLLER
> >  
> >  	.ndo_poll_controller = virtnet_netpoll,
> >  
> >  #endif
> > 
> > @@ -1030,10 +1111,10 @@ static void virtnet_config_changed_work(struct
> > work_struct *work)> 
> >  	if (vi->status & VIRTIO_NET_S_LINK_UP) {
> >  	
> >  		netif_carrier_on(vi->dev);
> > 
> > -		netif_wake_queue(vi->dev);
> > +		netif_tx_wake_all_queues(vi->dev);
> > 
> >  	} else {
> >  	
> >  		netif_carrier_off(vi->dev);
> > 
> > -		netif_stop_queue(vi->dev);
> > +		netif_tx_stop_all_queues(vi->dev);
> > 
> >  	}
> >  
> >  done:
> >  	mutex_unlock(&vi->config_lock);
> > 
> > @@ -1046,41 +1127,219 @@ static void virtnet_config_changed(struct
> > virtio_device *vdev)> 
> >  	schedule_work(&vi->config_work);
> >  
> >  }
> > 
> > -static int init_vqs(struct virtnet_info *vi)
> > +static void free_receive_bufs(struct virtnet_info *vi)
> > 
> >  {
> > 
> > -	struct virtqueue *vqs[3];
> > -	vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL};
> > -	const char *names[] = { "input", "output", "control" };
> > -	int nvqs, err;
> > +	int i;
> > 
> > -	/* We expect two virtqueues, receive then send,
> > -	 * and optionally control. */
> > -	nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
> > +	for (i = 0; i < vi->max_queue_pairs; i++) {
> > +		while (vi->rq[i].pages)
> > +			__free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0);
> > +	}
> > +}
> > 
> > -	err = vi->vdev->config->find_vqs(vi->vdev, nvqs, vqs, callbacks, names);
> > -	if (err)
> > -		return err;
> > +/* Free memory allocated for send and receive queues */
> > +static void virtnet_free_queues(struct virtnet_info *vi)
> 
> I think it's cleaner to open-code this, this way
> during error handling you can have:
> 
> 	kfree(vi->rq);
> err_rq:
> 	kfree(vi->sq);
> err_sq:
> 
> without tricks like malloc them both then goto end.

Ok.
> 
> > +{
> > +	kfree(vi->rq);
> > +	vi->rq = NULL;
> > +	kfree(vi->sq);
> > +	vi->sq = NULL;
> 
> I think = NULL is not needed - we never call this twice.

Sure, will remove it.
> 
> > +}
> > +
> > +static void free_unused_bufs(struct virtnet_info *vi)
> > +{
> > +	void *buf;
> > +	int i;
> > +
> > +	for (i = 0; i < vi->max_queue_pairs; i++) {
> > +		struct virtqueue *vq = vi->sq[i].vq;
> > +		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
> > +			dev_kfree_skb(buf);
> > +	}
> > +
> > +	for (i = 0; i < vi->max_queue_pairs; i++) {
> > +		struct virtqueue *vq = vi->rq[i].vq;
> > +
> > +		while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) {
> > +			if (vi->mergeable_rx_bufs || vi->big_packets)
> > +				give_pages(&vi->rq[i], buf);
> > +			else
> > +				dev_kfree_skb(buf);
> > +			--vi->rq[i].num;
> > +		}
> > +		BUG_ON(vi->rq[i].num != 0);
> > +	}
> > +}
> > +
> > +static void virtnet_set_affinity(struct virtnet_info *vi, bool set)
> > +{
> > +	int i;
> > +
> > +	/* Don't set the affinity hint when in single queue mode or we have too
> > +	 * much online cpus.
> > +	 */
> 
> Pls remove this comment, or replace with one explaining
> the motivation for this logic.

Will replace with the motivation.
> 
> > +	if (vi->curr_queue_pairs == 1 ||
> > +	    vi->max_queue_pairs > num_online_cpus())
> 
> If we have less it's not a good idea either, is it?
> So check vi->max_queue_pairs != num_online_cpus().

Ok.
> 
> > +		set = false;
> 
> This will overwrite affinity if it was set by userspace.
> Just
> 	if (set)
> 		return;
> will not have this problem.

But we need handle the situtaiton when switch back to sq from mq mode. 
Otherwise we may still get the affinity hint used in mq.  This kind of overwrite 
is unavoidable or is there some method to detect whether userspac write 
something new?
> 
> > +	for (i = 0; i < vi->max_queue_pairs; i++) {
> > +		int cpu = set ? i : -1;
> > +		virtqueue_set_affinity(vi->rq[i].vq, cpu);
> > +		virtqueue_set_affinity(vi->sq[i].vq, cpu);
> > +	}
> > +}
> > +
> > +static void virtnet_del_vqs(struct virtnet_info *vi)
> 
> It might be a good idea to add this function in previous
> patch.

Yes, good suggetion.
> 
> > +{
> > +	struct virtio_device *vdev = vi->vdev;
> > +
> > +	virtnet_set_affinity(vi, false);
> > +
> > +	vdev->config->del_vqs(vdev);
> > 
> > -	vi->rq.vq = vqs[0];
> > -	vi->sq.vq = vqs[1];
> > +	virtnet_free_queues(vi);
> > +}
> > 
> > -	if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) {
> > -		vi->cvq = vqs[2];
> > +static int virtnet_find_vqs(struct virtnet_info *vi)
> > +{
> > +	vq_callback_t **callbacks;
> > +	struct virtqueue **vqs;
> > +	int ret = -ENOMEM;
> > +	int i, total_vqs;
> > +	char **names;
> > +
> > +	/* We expect 1 RX virtqueue followed by 1 TX virtqueue, followd by
> 
> followed
> 

Ok, will correct it.
> > +	 * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by
> > +	 * possible control vq.
> > +	 */
> > +	total_vqs = vi->max_queue_pairs * 2 +
> > +		    virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ);
> > +
> > +	/* Allocate space for find_vqs parameters */
> > +	vqs = kzalloc(total_vqs * sizeof(*vqs), GFP_KERNEL);
> > +	callbacks = kzalloc(total_vqs * sizeof(*callbacks), GFP_KERNEL);
> > +	if (!vqs || !callbacks)
> > +		goto err_mem;
> > +	names = kzalloc(total_vqs * sizeof(*names), GFP_KERNEL);
> 
> Why kzalloc? You seem to fill in all of it. Pls just use kmalloc
> and initialize fields.
> 

Right.
> > +	if (!names)
> > +		goto err_mem;
> 
> Since you have separate goto here it's more consistent
> to use a separate label and two if tests above.
> 

Sure.
> > +
> > +	/* Parameters for control virtqueue, if any */
> > +	if (vi->has_cvq) {
> > +		callbacks[total_vqs - 1] = NULL;
> > +		names[total_vqs - 1] = kasprintf(GFP_KERNEL, "control");
> > +	}
> > 
> > +	/* Allocate/initialize parameters for send/receive virtqueues */
> > +	for (i = 0; i < vi->max_queue_pairs; i++) {
> > +		callbacks[rxq2vq(i)] = skb_recv_done;
> > +		callbacks[txq2vq(i)] = skb_xmit_done;
> > +		names[rxq2vq(i)] = kasprintf(GFP_KERNEL, "input.%d", i);
> > +		names[txq2vq(i)] = kasprintf(GFP_KERNEL, "output.%d", i);
> > +	}
> 
> We would need to check kasprintf return value.

Looks like a better method is to make the name as a memeber of receive_queue 
and send_queue, and use sprintf here.
> Also if you allocate names from slab we'll need to free them
> later.

Then it could be freed when the send_queue and receive_queue is freed.
> It's probably easier to just use fixed names for now -
> it's not like the index is really useful.

Looks useful for debugging e.g. check whether the irq distribution is as 
expected.
> 
> > +
> > +	ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks,
> > +					 (const char **)names);
> 
> Please avoid casts, use a proper type for names.

I'm consider we need a minor change in this api, we need allocate the names 
dynamically which could not be a const char **.
> 
> > +	if (ret)
> > +		goto err_names;
> > +
> > +	if (vi->has_cvq) {
> > +		vi->cvq = vqs[total_vqs - 1];
> > 
> >  		if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN))
> >  		
> >  			vi->dev->features |= NETIF_F_HW_VLAN_FILTER;
> >  	
> >  	}
> > 
> > +
> > +	for (i = 0; i < vi->max_queue_pairs; i++) {
> > +		vi->rq[i].vq = vqs[rxq2vq(i)];
> > +		vi->sq[i].vq = vqs[txq2vq(i)];
> > +	}
> > +
> > +	kfree(callbacks);
> > +	kfree(vqs);
> 
> Who frees names if there's no error?
> 

The virtio core does not copy the name, so it need this and only used for 
debugging if I'm reading the code correctly.
> > +
> > +	return 0;
> > +
> > +err_names:
> > +	for (i = 0; i < total_vqs * 2; i++)
> 
> Why * 2?
> This looks like a bug.

Yes, it's a bug.
> 
> > +		kfree(names[i]);
> > +	kfree(names);
> > +
> > +err_mem:
> > +	kfree(callbacks);
> > +	kfree(vqs);
> > +
> > +	return ret;
> > +}
> > +
> > +static int virtnet_alloc_queues(struct virtnet_info *vi)
> > +{
> > +	int i;
> > +
> > +	vi->sq = kzalloc(sizeof(vi->sq[0]) * vi->max_queue_pairs, GFP_KERNEL);
> > +	vi->rq = kzalloc(sizeof(vi->rq[0]) * vi->max_queue_pairs, GFP_KERNEL);
> 
> While equivalent, *vi->rq is clearer IMHO.

Ok.
> 
> > +	if (!vi->rq || !vi->sq)
> > +		goto err;
> > +
> > +	INIT_DELAYED_WORK(&vi->refill, refill_work);
> > +	/* setup initial receive and send queue parameters */
> 
> Pls remove this comment, it's confuses more than it clarifies.

Sure.
> 
> > +	for (i = 0; i < vi->max_queue_pairs; i++) {
> > +		vi->rq[i].pages = NULL;
> > +		netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll,
> > +			       napi_weight);
> > +
> > +		sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg));
> > +		sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg));
> > +	}
> > +
> > +
> 
> Extra empty line.
> 

Will remove this line.
> > +	return 0;
> > +
> > +err:
> > +	virtnet_free_queues(vi);
> > +	return -ENOMEM;
> > +}
> > +
> > +static int init_vqs(struct virtnet_info *vi)
> > +{
> > +	int ret;
> > +
> > +	/* Allocate send & receive queues */
> > +	ret = virtnet_alloc_queues(vi);
> > +	if (ret)
> > +		goto err;
> > +
> > +	ret = virtnet_find_vqs(vi);
> > +	if (ret)
> > +		goto err_free;
> > +
> > +	virtnet_set_affinity(vi, true);
> > 
> >  	return 0;
> > 
> > +
> > +err_free:
> > +	virtnet_free_queues(vi);
> > +err:
> > +	return ret;
> > 
> >  }
> >  
> >  static int virtnet_probe(struct virtio_device *vdev)
> >  {
> > 
> > -	int err;
> > +	int i, err;
> > 
> >  	struct net_device *dev;
> >  	struct virtnet_info *vi;
> > 
> > +	u16 max_queue_pairs;
> > +
> > +	/* Find if host supports multiqueue virtio_net device */
> > +	err = virtio_config_val(vdev, VIRTIO_NET_F_RFS,
> > +				offsetof(struct virtio_net_config,
> > +				max_virtqueue_pairs), &max_queue_pairs);
> > +
> > +	/* We need at least 2 queue's */
> > +	if (err || max_queue_pairs < VIRTIO_NET_CTRL_RFS_VQ_PAIRS_MIN ||
> > +	    max_queue_pairs > VIRTIO_NET_CTRL_RFS_VQ_PAIRS_MAX)
> 
> Check has_cvq as well.
> 

Sure.
> > +		max_queue_pairs = 1;
> > 
> >  	/* Allocate ourselves a network device with room for our info */
> > 
> > -	dev = alloc_etherdev(sizeof(struct virtnet_info));
> > +	dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs);
> > 
> >  	if (!dev)
> >  	
> >  		return -ENOMEM;
> > 
> > @@ -1127,22 +1386,17 @@ static int virtnet_probe(struct virtio_device
> > *vdev)> 
> >  	/* Set up our device-specific information */
> >  	vi = netdev_priv(dev);
> > 
> > -	netif_napi_add(dev, &vi->rq.napi, virtnet_poll, napi_weight);
> > 
> >  	vi->dev = dev;
> >  	vi->vdev = vdev;
> >  	vdev->priv = vi;
> > 
> > -	vi->rq.pages = NULL;
> > 
> >  	vi->stats = alloc_percpu(struct virtnet_stats);
> >  	err = -ENOMEM;
> >  	if (vi->stats == NULL)
> >  	
> >  		goto free;
> > 
> > -	INIT_DELAYED_WORK(&vi->refill, refill_work);
> > 
> >  	mutex_init(&vi->config_lock);
> >  	vi->config_enable = true;
> >  	INIT_WORK(&vi->config_work, virtnet_config_changed_work);
> > 
> > -	sg_init_table(vi->rq.sg, ARRAY_SIZE(vi->rq.sg));
> > -	sg_init_table(vi->sq.sg, ARRAY_SIZE(vi->sq.sg));
> > 
> >  	/* If we can receive ANY GSO packets, we must allocate large ones. */
> >  	if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
> > 
> > @@ -1153,10 +1407,21 @@ static int virtnet_probe(struct virtio_device
> > *vdev)> 
> >  	if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF))
> >  	
> >  		vi->mergeable_rx_bufs = true;
> > 
> > +	if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ))
> > +		vi->has_cvq = true;
> > +
> > +	/* Use single tx/rx queue pair as default */
> > +	vi->curr_queue_pairs = 1;
> > +	vi->max_queue_pairs = max_queue_pairs;
> > +
> > +	/* Allocate/initialize the rx/tx queues, and invoke find_vqs */
> > 
> >  	err = init_vqs(vi);
> >  	if (err)
> >  	
> >  		goto free_stats;
> > 
> > +	netif_set_real_num_tx_queues(dev, 1);
> > +	netif_set_real_num_rx_queues(dev, 1);
> > +
> > 
> >  	err = register_netdev(dev);
> >  	if (err) {
> >  	
> >  		pr_debug("virtio_net: registering device failed\n");
> > 
> > @@ -1164,12 +1429,15 @@ static int virtnet_probe(struct virtio_device
> > *vdev)> 
> >  	}
> >  	
> >  	/* Last of all, set up some receive buffers. */
> > 
> > -	try_fill_recv(&vi->rq, GFP_KERNEL);
> > -
> > -	/* If we didn't even get one input buffer, we're useless. */
> > -	if (vi->rq.num == 0) {
> > -		err = -ENOMEM;
> > -		goto unregister;
> > +	for (i = 0; i < vi->max_queue_pairs; i++) {
> > +		try_fill_recv(&vi->rq[i], GFP_KERNEL);
> > +
> > +		/* If we didn't even get one input buffer, we're useless. */
> > +		if (vi->rq[i].num == 0) {
> > +			free_unused_bufs(vi);
> > +			err = -ENOMEM;
> > +			goto free_recv_bufs;
> > +		}
> > 
> >  	}
> >  	
> >  	/* Assume link up if device can't report link status,
> > 
> > @@ -1182,13 +1450,19 @@ static int virtnet_probe(struct virtio_device
> > *vdev)> 
> >  		netif_carrier_on(dev);
> >  	
> >  	}
> > 
> > -	pr_debug("virtnet: registered device %s\n", dev->name);
> > +	pr_debug("virtnet: registered device %s with %d RX and TX vq's\n",
> > +		 dev->name, max_queue_pairs);
> > +
> > 
> >  	return 0;
> > 
> > -unregister:
> > +free_recv_bufs:
> > +	free_receive_bufs(vi);
> > 
> >  	unregister_netdev(dev);
> > 
> > +
> > 
> >  free_vqs:
> > -	vdev->config->del_vqs(vdev);
> > +	cancel_delayed_work_sync(&vi->refill);
> > +	virtnet_del_vqs(vi);
> > +
> > 
> >  free_stats:
> >  	free_percpu(vi->stats);
> >  
> >  free:
> > @@ -1196,28 +1470,6 @@ free:
> >  	return err;
> >  
> >  }
> > 
> > -static void free_unused_bufs(struct virtnet_info *vi)
> > -{
> > -	void *buf;
> > -	while (1) {
> > -		buf = virtqueue_detach_unused_buf(vi->sq.vq);
> > -		if (!buf)
> > -			break;
> > -		dev_kfree_skb(buf);
> > -	}
> > -	while (1) {
> > -		buf = virtqueue_detach_unused_buf(vi->rq.vq);
> > -		if (!buf)
> > -			break;
> > -		if (vi->mergeable_rx_bufs || vi->big_packets)
> > -			give_pages(&vi->rq, buf);
> > -		else
> > -			dev_kfree_skb(buf);
> > -		--vi->rq.num;
> > -	}
> > -	BUG_ON(vi->rq.num != 0);
> > -}
> > -
> > 
> >  static void remove_vq_common(struct virtnet_info *vi)
> >  {
> >  
> >  	vi->vdev->config->reset(vi->vdev);
> > 
> > @@ -1225,10 +1477,9 @@ static void remove_vq_common(struct virtnet_info
> > *vi)> 
> >  	/* Free unused buffers in both send and recv, if any. */
> >  	free_unused_bufs(vi);
> > 
> > -	vi->vdev->config->del_vqs(vi->vdev);
> > +	free_receive_bufs(vi);
> > 
> > -	while (vi->rq.pages)
> > -		__free_pages(get_a_page(&vi->rq, GFP_KERNEL), 0);
> > +	virtnet_del_vqs(vi);
> > 
> >  }
> >  
> >  static void __devexit virtnet_remove(struct virtio_device *vdev)
> > 
> > @@ -1254,6 +1505,7 @@ static void __devexit virtnet_remove(struct
> > virtio_device *vdev)> 
> >  static int virtnet_freeze(struct virtio_device *vdev)
> >  {
> >  
> >  	struct virtnet_info *vi = vdev->priv;
> > 
> > +	int i;
> > 
> >  	/* Prevent config work handler from accessing the device */
> >  	mutex_lock(&vi->config_lock);
> > 
> > @@ -1264,7 +1516,10 @@ static int virtnet_freeze(struct virtio_device
> > *vdev)> 
> >  	cancel_delayed_work_sync(&vi->refill);
> >  	
> >  	if (netif_running(vi->dev))
> > 
> > -		napi_disable(&vi->rq.napi);
> > +		for (i = 0; i < vi->max_queue_pairs; i++) {
> > +			napi_disable(&vi->rq[i].napi);
> > +			netif_napi_del(&vi->rq[i].napi);
> > +		}
> > 
> >  	remove_vq_common(vi);
> > 
> > @@ -1276,24 +1531,29 @@ static int virtnet_freeze(struct virtio_device
> > *vdev)> 
> >  static int virtnet_restore(struct virtio_device *vdev)
> >  {
> >  
> >  	struct virtnet_info *vi = vdev->priv;
> > 
> > -	int err;
> > +	int err, i;
> > 
> >  	err = init_vqs(vi);
> >  	if (err)
> >  	
> >  		return err;
> >  	
> >  	if (netif_running(vi->dev))
> > 
> > -		virtnet_napi_enable(&vi->rq);
> > +		for (i = 0; i < vi->max_queue_pairs; i++)
> > +			virtnet_napi_enable(&vi->rq[i]);
> > 
> >  	netif_device_attach(vi->dev);
> > 
> > -	if (!try_fill_recv(&vi->rq, GFP_KERNEL))
> > -		schedule_delayed_work(&vi->refill, 0);
> > +	for (i = 0; i < vi->max_queue_pairs; i++)
> > +		if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
> > +			schedule_delayed_work(&vi->refill, 0);
> > 
> >  	mutex_lock(&vi->config_lock);
> >  	vi->config_enable = true;
> >  	mutex_unlock(&vi->config_lock);
> > 
> > +	if (vi->has_cvq && virtio_has_feature(vi->vdev, VIRTIO_NET_F_RFS))
> > +		virtnet_set_queues(vi);
> > +
> 
> I think it's easier to test
> if (curr_queue_pairs == max_queue_pairs)
> within virtnet_set_queues and make it
> a NOP if so.

Still need to send the command during restore since we reset the device during 
freezing.
> 
> >  	return 0;
> >  
> >  }
> >  #endif
> > 
> > @@ -1311,7 +1571,7 @@ static unsigned int features[] = {
> > 
> >  	VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO,
> >  	VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ,
> >  	VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN,
> > 
> > -	VIRTIO_NET_F_GUEST_ANNOUNCE,
> > +	VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_RFS,
> > 
> >  };
> >  
> >  static struct virtio_driver virtio_net_driver = {
> > 
> > @@ -1329,6 +1589,12 @@ static struct virtio_driver virtio_net_driver = {
> > 
> >  #endif
> >  };
> > 
> > +static const struct ethtool_ops virtnet_ethtool_ops = {
> > +	.get_drvinfo = virtnet_get_drvinfo,
> > +	.get_link = ethtool_op_get_link,
> > +	.get_ringparam = virtnet_get_ringparam,
> > +};
> > +
> > 
> >  static int __init init(void)
> >  {
> >  
> >  	return register_virtio_driver(&virtio_net_driver);
> > 
> > diff --git a/include/uapi/linux/virtio_net.h
> > b/include/uapi/linux/virtio_net.h index 2470f54..6056cec 100644
> > --- a/include/uapi/linux/virtio_net.h
> > +++ b/include/uapi/linux/virtio_net.h
> > @@ -51,6 +51,7 @@
> > 
> >  #define VIRTIO_NET_F_CTRL_RX_EXTRA 20	/* Extra RX mode control support 
*/
> >  #define VIRTIO_NET_F_GUEST_ANNOUNCE 21	/* Guest can announce device on
> >  the
> >  
> >  					 * network */
> > 
> > +#define VIRTIO_NET_F_RFS	22	/* Device supports multiple TXQ/RXQ */
> 
> Should be
> /* Device supports Receive Flow Steering. */

Ok.
> 
> >  #define VIRTIO_NET_S_LINK_UP	1	/* Link is up */
> >  #define VIRTIO_NET_S_ANNOUNCE	2	/* Announcement is needed */
> > 
> > @@ -60,6 +61,8 @@ struct virtio_net_config {
> > 
> >  	__u8 mac[6];
> >  	/* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */
> >  	__u16 status;
> > 
> > +	/* Total number of RX/TX queues */
> 
> Better comment:
> /* Maximum number of each of transmit and receive queues;
>  * see VIRTIO_NET_F_RFS and VIRTIO_NET_CTRL_RFS.
>  * Legal values are between 1 and 0x8000
>  */

Sure.
> 
> > +	__u16 max_virtqueue_pairs;
> > 
> >  } __attribute__((packed));
> >  
> >  /* This is the first element of the scatter-gather list.  If you don't
> > 
> > @@ -166,4 +169,17 @@ struct virtio_net_ctrl_mac {
> > 
> >  #define VIRTIO_NET_CTRL_ANNOUNCE       3
> >  
> >   #define VIRTIO_NET_CTRL_ANNOUNCE_ACK         0
> > 
> > +/*
> > + * Control multiqueue
> 
> Here's a better comment:
> 
> Control Receive Flow Steering
> 
>  The command VIRTIO_NET_CTRL_RFS_VQ_PAIRS_SET
>  enables Receive Flow Steering, specifying the number of the transmit and
> receive queues that will be used.
>  After the command is consumed and acked by the device,
>  the device will not steer new packets on receive virtqueues
>  other than specified nor read from transmit virtqueues other than
> specified. Accordingly, driver should not transmit new packets
>  on virtqueues other than specified.
> 

Sure, thanks for the this reminding.
> > + *
> 
> Remove this empty line.

Ok.
> 
> > + */
> > +struct virtio_net_ctrl_rfs {
> 
> /* Number of each of transmit and receive queues to use;
>  * Legal values are between 1 and max_virtqueue_pairs
>  */
> 
> > +	u16 virtqueue_pairs;
> > +};
> > +
> > +#define VIRTIO_NET_CTRL_RFS   4
> > + #define VIRTIO_NET_CTRL_RFS_VQ_PAIRS_SET        0
> 
> /* Value range for max_virtqueue_pairsfor and virtqueue_pairs above */
> 
> > + #define VIRTIO_NET_CTRL_RFS_VQ_PAIRS_MIN        1
> > + #define VIRTIO_NET_CTRL_RFS_VQ_PAIRS_MAX        0x8000
> > +
> > 
> >  #endif /* _LINUX_VIRTIO_NET_H */
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* RE: [net-next PATCH V3-evictor] net: frag evictor,avoid killing warm frag queues
From: David Laight @ 2012-12-04 14:32 UTC (permalink / raw)
  To: Jesper Dangaard Brouer, Eric Dumazet, David S. Miller,
	Florian Westphal
  Cc: netdev, Thomas Graf, Paul E. McKenney, Cong Wang, Herbert Xu
In-Reply-To: <20121204133007.20215.52566.stgit@dragon>

> The fragmentation evictor system have a very unfortunate eviction
> system for killing fragment, when the system is put under pressure.
> 
> If packets are coming in too fast, the evictor code kills "warm"
> fragments too quickly.  Resulting in close to zero throughput, as
> fragments are killed before they have a chance to complete
> 
> This is related to the bad interaction with the LRU (Least Recently
> Used) list.  Under load the LRU list sort-of changes meaning/behavior.
> When the LRU head is very new/warm, then the head is most likely the
> one with most fragments and the tail (latest used or added element)
> with least.
> 
> Solved by, introducing a creation "jiffie" timestamp (creation_ts).
> If the element is tried evicted in same jiffie, then perform tail drop
> on the LRU list instead.

I'm not at all sure a 'same tick' test is actually sensible.

Some quick 'ball park' maths:
10Mbps is about 1kB per jiffie (1kHz tick).
So at 10Mbps you'll see a frame every 1.5 jiffies.
So if the source of any fragment stream is some remote ADSL
connection (rather than a local Ge LAN connection) you'll
never manage to assemble the entire datagram.
Anyone connecting from dialup will fail miserably.

This means you need to keep some initial fragments for much
longer than 1 jiffie before discarding them.

Anything with more than one in-sequence fragment is a good
candidate for keeping (especially if the inter-fragment time
is being monitored).

Anything with out of sequence fragments is a very good choice
for discard.

But yes, discarding some 'new' fragments is very likely necessary
in order to manage to actually receive anything.

Sometimes I've wondered whether discarding some transmits would
help when the rx side is congested (and v.v.). I've never tried
modelling that one with any traffic flows! But forcing the remote
to timeout might reduce the overall traffic, trouble is the
discards would have to be carefully chosen!
(I was thinking about this mainly with regard to routers that
are having issues feeding outbound data into a narrow pipe.)

	David

^ permalink raw reply

* [PATCH 7/7] qlcnic: rename module params with module_param_named
From: Sony Chacko @ 2012-12-04 13:33 UTC (permalink / raw)
  To: davem; +Cc: netdev, Dept_NX_Linux_NIC_Driver, Sony Chacko
In-Reply-To: <1354628038-2234-1-git-send-email-sony.chacko@qlogic.com>

From: Sony Chacko <sony.chacko@qlogic.com>

Add qlcnic prefix to qlcnic driver module parameters.

Signed-off-by: Sony Chacko <sony.chacko@qlogic.com>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c |   32 +++++++++++-----------
 1 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index d94d3e9..8b566c2 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -34,21 +34,21 @@ static int qlcnic_mac_learn;
 module_param(qlcnic_mac_learn, int, 0444);
 MODULE_PARM_DESC(qlcnic_mac_learn, "Mac Filter (0=disabled, 1=enabled)");
 
-static int use_msi = 1;
-module_param(use_msi, int, 0444);
+static int qlcnic_use_msi = 1;
 MODULE_PARM_DESC(use_msi, "MSI interrupt (0=disabled, 1=enabled");
+module_param_named(use_msi, qlcnic_use_msi, int, 0444);
 
-static int use_msi_x = 1;
-module_param(use_msi_x, int, 0444);
+static int qlcnic_use_msi_x = 1;
 MODULE_PARM_DESC(use_msi_x, "MSI-X interrupt (0=disabled, 1=enabled");
+module_param_named(use_msi_x, qlcnic_use_msi_x, int, 0444);
 
-static int auto_fw_reset = 1;
-module_param(auto_fw_reset, int, 0644);
+static int qlcnic_auto_fw_reset = 1;
 MODULE_PARM_DESC(auto_fw_reset, "Auto firmware reset (0=disabled, 1=enabled");
+module_param_named(auto_fw_reset, qlcnic_auto_fw_reset, int, 0644);
 
-static int load_fw_file;
-module_param(load_fw_file, int, 0444);
+static int qlcnic_load_fw_file;
 MODULE_PARM_DESC(load_fw_file, "Load firmware from (0=flash, 1=file");
+module_param_named(load_fw_file, qlcnic_load_fw_file, int, 0444);
 
 static int qlcnic_config_npars;
 module_param(qlcnic_config_npars, int, 0444);
@@ -317,7 +317,7 @@ static void qlcnic_enable_msi_legacy(struct qlcnic_adapter *adapter)
 	struct qlcnic_hardware_context *ahw = adapter->ahw;
 	struct pci_dev *pdev = adapter->pdev;
 
-	if (use_msi && !pci_enable_msi(pdev)) {
+	if (qlcnic_use_msi && !pci_enable_msi(pdev)) {
 		adapter->flags |= QLCNIC_MSI_ENABLED;
 		offset = msi_tgt_status[adapter->ahw->pci_func];
 		adapter->tgt_status_reg = qlcnic_get_ioaddr(adapter->ahw,
@@ -631,7 +631,7 @@ qlcnic_check_options(struct qlcnic_adapter *adapter)
 		adapter->max_rxd = MAX_RCV_DESCRIPTORS_1G;
 	}
 
-	adapter->ahw->msix_supported = !!use_msi_x;
+	adapter->ahw->msix_supported = !!qlcnic_use_msi_x;
 
 	adapter->num_txd = MAX_CMD_DESCRIPTORS;
 
@@ -961,7 +961,7 @@ qlcnic_start_firmware(struct qlcnic_adapter *adapter)
 	else if (!err)
 		goto check_fw_status;
 
-	if (load_fw_file)
+	if (qlcnic_load_fw_file)
 		qlcnic_request_firmware(adapter);
 	else {
 		err = qlcnic_check_flash_fw_ver(adapter);
@@ -2558,7 +2558,7 @@ qlcnic_check_health(struct qlcnic_adapter *adapter)
 		if (adapter->need_fw_reset)
 			goto detach;
 
-		if (adapter->ahw->reset_context && auto_fw_reset) {
+		if (adapter->ahw->reset_context && qlcnic_auto_fw_reset) {
 			qlcnic_reset_hw_context(adapter);
 			adapter->netdev->trans_start = jiffies;
 		}
@@ -2573,7 +2573,7 @@ qlcnic_check_health(struct qlcnic_adapter *adapter)
 
 	qlcnic_dev_request_reset(adapter);
 
-	if (auto_fw_reset)
+	if (qlcnic_auto_fw_reset)
 		clear_bit(__QLCNIC_FW_ATTACHED, &adapter->state);
 
 	dev_err(&adapter->pdev->dev, "firmware hang detected\n");
@@ -2598,8 +2598,8 @@ detach:
 	adapter->dev_state = (state == QLCNIC_DEV_NEED_QUISCENT) ? state :
 		QLCNIC_DEV_NEED_RESET;
 
-	if (auto_fw_reset &&
-		!test_and_set_bit(__QLCNIC_RESETTING, &adapter->state)) {
+	if (qlcnic_auto_fw_reset && !test_and_set_bit(__QLCNIC_RESETTING,
+						      &adapter->state)) {
 
 		qlcnic_schedule_work(adapter, qlcnic_detach_work, 0);
 		QLCDB(adapter, DRV, "fw recovery scheduled.\n");
@@ -2784,7 +2784,7 @@ qlcnicvf_start_firmware(struct qlcnic_adapter *adapter)
 
 int qlcnic_validate_max_rss(struct net_device *netdev, u8 max_hw, u8 val)
 {
-	if (!use_msi_x && !use_msi) {
+	if (!qlcnic_use_msi_x && !qlcnic_use_msi) {
 		netdev_info(netdev, "no msix or msi support, hence no rss\n");
 		return -EINVAL;
 	}
-- 
1.7.1

^ permalink raw reply related

* [PATCH 5/7] qlcnic: update NIC partition interface routines
From: Sony Chacko @ 2012-12-04 13:33 UTC (permalink / raw)
  To: davem; +Cc: netdev, Dept_NX_Linux_NIC_Driver, Sony Chacko
In-Reply-To: <1354628038-2234-1-git-send-email-sony.chacko@qlogic.com>

From: Sony Chacko <sony.chacko@qlogic.com>

Refactor 82xx driver to support new adapter
Update routines to support variable number of NIC partitions

Signed-off-by: Rajesh Borundia <rajesh.borundia@qlogic.com>
Signed-off-by: Sony Chacko <sony.chacko@qlogic.com>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic.h      |    1 +
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c  |   39 +++++++--
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c |   97 +++++++++++++---------
 3 files changed, 89 insertions(+), 48 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index 4d85c70..5379024 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -1051,6 +1051,7 @@ struct qlcnic_npar_info {
 	u8	mac_anti_spoof;
 	u8	promisc_mode;
 	u8	offload_flags;
+	u8      pci_func;
 };
 
 struct qlcnic_eswitch {
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
index c5cfbaa..58f094c 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
@@ -7,6 +7,18 @@
 
 #include "qlcnic.h"
 
+static int qlcnic_is_valid_nic_func(struct qlcnic_adapter *adapter, u8 pci_func)
+{
+	int i;
+
+	for (i = 0; i < adapter->ahw->act_pci_func; i++) {
+		if (adapter->npars[i].pci_func == pci_func)
+			return i;
+	}
+
+	return -1;
+}
+
 static u32
 qlcnic_poll_rsp(struct qlcnic_adapter *adapter)
 {
@@ -817,11 +829,14 @@ int qlcnic_get_pci_info(struct qlcnic_adapter *adapter,
 	qlcnic_issue_cmd(adapter, &cmd);
 	err = cmd.rsp.cmd;
 
+	adapter->ahw->act_pci_func = 0;
 	if (err == QLCNIC_RCODE_SUCCESS) {
 		for (i = 0; i < QLCNIC_MAX_PCI_FUNC; i++, npar++, pci_info++) {
 			pci_info->id = le16_to_cpu(npar->id);
 			pci_info->active = le16_to_cpu(npar->active);
 			pci_info->type = le16_to_cpu(npar->type);
+			if (pci_info->type == QLCNIC_TYPE_NIC)
+				adapter->ahw->act_pci_func++;
 			pci_info->default_port =
 				le16_to_cpu(npar->default_port);
 			pci_info->tx_min_bw =
@@ -1016,12 +1031,13 @@ int qlcnic_get_eswitch_stats(struct qlcnic_adapter *adapter, const u8 eswitch,
 	esw_stats->numbytes = QLCNIC_STATS_NOT_AVAIL;
 	esw_stats->context_id = eswitch;
 
-	for (i = 0; i < QLCNIC_MAX_PCI_FUNC; i++) {
+	for (i = 0; i < adapter->ahw->act_pci_func; i++) {
 		if (adapter->npars[i].phy_port != eswitch)
 			continue;
 
 		memset(&port_stats, 0, sizeof(struct __qlcnic_esw_statistics));
-		if (qlcnic_get_port_stats(adapter, i, rx_tx, &port_stats))
+		if (qlcnic_get_port_stats(adapter, adapter->npars[i].pci_func,
+					  rx_tx, &port_stats))
 			continue;
 
 		esw_stats->size = port_stats.size;
@@ -1120,7 +1136,7 @@ op_type = 1 for port vlan_id
 int qlcnic_config_switch_port(struct qlcnic_adapter *adapter,
 		struct qlcnic_esw_func_cfg *esw_cfg)
 {
-	int err = -EIO;
+	int err = -EIO, index;
 	u32 arg1, arg2 = 0;
 	struct qlcnic_cmd_args cmd;
 	u8 pci_func;
@@ -1128,7 +1144,10 @@ int qlcnic_config_switch_port(struct qlcnic_adapter *adapter,
 	if (adapter->ahw->op_mode != QLCNIC_MGMT_FUNC)
 		return err;
 	pci_func = esw_cfg->pci_func;
-	arg1 = (adapter->npars[pci_func].phy_port & BIT_0);
+	index = qlcnic_is_valid_nic_func(adapter, pci_func);
+	if (index < 0)
+		return err;
+	arg1 = (adapter->npars[index].phy_port & BIT_0);
 	arg1 |= (pci_func << 8);
 
 	if (__qlcnic_get_eswitch_port_config(adapter, &arg1, &arg2))
@@ -1192,11 +1211,17 @@ qlcnic_get_eswitch_port_config(struct qlcnic_adapter *adapter,
 			struct qlcnic_esw_func_cfg *esw_cfg)
 {
 	u32 arg1, arg2;
+	int index;
 	u8 phy_port;
-	if (adapter->ahw->op_mode == QLCNIC_MGMT_FUNC)
-		phy_port = adapter->npars[esw_cfg->pci_func].phy_port;
-	else
+
+	if (adapter->ahw->op_mode == QLCNIC_MGMT_FUNC) {
+		index = qlcnic_is_valid_nic_func(adapter, esw_cfg->pci_func);
+		if (index < 0)
+			return -EIO;
+		phy_port = adapter->npars[index].phy_port;
+	} else {
 		phy_port = adapter->ahw->physical_port;
+	}
 	arg1 = phy_port;
 	arg1 |= (esw_cfg->pci_func << 8);
 	if (__qlcnic_get_eswitch_port_config(adapter, &arg1, &arg2))
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index bfcd004..d94d3e9 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -91,6 +91,9 @@ static void qlcnic_set_netdev_features(struct qlcnic_adapter *,
 static int qlcnic_vlan_rx_add(struct net_device *, u16);
 static int qlcnic_vlan_rx_del(struct net_device *, u16);
 
+#define QLCNIC_IS_TSO_CAPABLE(adapter)	\
+	((adapter)->ahw->capabilities & QLCNIC_FW_CAPABILITY_TSO)
+
 /*  PCI Device ID Table  */
 #define ENTRY(device) \
 	{PCI_DEVICE(PCI_VENDOR_ID_QLOGIC, (device)), \
@@ -369,19 +372,25 @@ qlcnic_cleanup_pci_map(struct qlcnic_adapter *adapter)
 		iounmap(adapter->ahw->pci_base0);
 }
 
-static int
-qlcnic_init_pci_info(struct qlcnic_adapter *adapter)
+static int qlcnic_init_pci_info(struct qlcnic_adapter *adapter)
 {
 	struct qlcnic_pci_info *pci_info;
-	int i, ret = 0;
+	int i, ret = 0, j = 0;
+	u16 act_pci_func;
 	u8 pfn;
 
 	pci_info = kcalloc(QLCNIC_MAX_PCI_FUNC, sizeof(*pci_info), GFP_KERNEL);
 	if (!pci_info)
 		return -ENOMEM;
 
+	ret = qlcnic_get_pci_info(adapter, pci_info);
+	if (ret)
+		goto err_pci_info;
+
+	act_pci_func = adapter->ahw->act_pci_func;
+
 	adapter->npars = kzalloc(sizeof(struct qlcnic_npar_info) *
-				QLCNIC_MAX_PCI_FUNC, GFP_KERNEL);
+				 act_pci_func, GFP_KERNEL);
 	if (!adapter->npars) {
 		ret = -ENOMEM;
 		goto err_pci_info;
@@ -394,21 +403,25 @@ qlcnic_init_pci_info(struct qlcnic_adapter *adapter)
 		goto err_npars;
 	}
 
-	ret = qlcnic_get_pci_info(adapter, pci_info);
-	if (ret)
-		goto err_eswitch;
-
 	for (i = 0; i < QLCNIC_MAX_PCI_FUNC; i++) {
 		pfn = pci_info[i].id;
+
 		if (pfn >= QLCNIC_MAX_PCI_FUNC) {
 			ret = QL_STATUS_INVALID_PARAM;
 			goto err_eswitch;
 		}
-		adapter->npars[pfn].active = (u8)pci_info[i].active;
-		adapter->npars[pfn].type = (u8)pci_info[i].type;
-		adapter->npars[pfn].phy_port = (u8)pci_info[i].default_port;
-		adapter->npars[pfn].min_bw = pci_info[i].tx_min_bw;
-		adapter->npars[pfn].max_bw = pci_info[i].tx_max_bw;
+
+		if (!pci_info[i].active ||
+		    (pci_info[i].type != QLCNIC_TYPE_NIC))
+			continue;
+
+		adapter->npars[j].pci_func = pfn;
+		adapter->npars[j].active = (u8)pci_info[i].active;
+		adapter->npars[j].type = (u8)pci_info[i].type;
+		adapter->npars[j].phy_port = (u8)pci_info[i].default_port;
+		adapter->npars[j].min_bw = pci_info[i].tx_min_bw;
+		adapter->npars[j].max_bw = pci_info[i].tx_max_bw;
+		j++;
 	}
 
 	for (i = 0; i < QLCNIC_NIU_MAX_XG_PORTS; i++)
@@ -436,7 +449,7 @@ qlcnic_set_function_modes(struct qlcnic_adapter *adapter)
 	u32 ref_count;
 	int i, ret = 1;
 	u32 data = QLCNIC_MGMT_FUNC;
-	void __iomem *priv_op = adapter->ahw->pci_base0 + QLCNIC_DRV_OP_MODE;
+	struct qlcnic_hardware_context *ahw = adapter->ahw;
 
 	/* If other drivers are not in use set their privilege level */
 	ref_count = QLCRD32(adapter, QLCNIC_CRB_DRV_ACTIVE);
@@ -445,21 +458,20 @@ qlcnic_set_function_modes(struct qlcnic_adapter *adapter)
 		goto err_lock;
 
 	if (qlcnic_config_npars) {
-		for (i = 0; i < QLCNIC_MAX_PCI_FUNC; i++) {
-			id = i;
-			if (adapter->npars[i].type != QLCNIC_TYPE_NIC ||
-				id == adapter->ahw->pci_func)
+		for (i = 0; i < ahw->act_pci_func; i++) {
+			id = adapter->npars[i].pci_func;
+			if (id == ahw->pci_func)
 				continue;
 			data |= (qlcnic_config_npars &
 					QLC_DEV_SET_DRV(0xf, id));
 		}
 	} else {
-		data = readl(priv_op);
-		data = (data & ~QLC_DEV_SET_DRV(0xf, adapter->ahw->pci_func)) |
+		data = QLCRD32(adapter, QLCNIC_DRV_OP_MODE);
+		data = (data & ~QLC_DEV_SET_DRV(0xf, ahw->pci_func)) |
 			(QLC_DEV_SET_DRV(QLCNIC_MGMT_FUNC,
-			adapter->ahw->pci_func));
+					 ahw->pci_func));
 	}
-	writel(data, priv_op);
+	QLCWR32(adapter, QLCNIC_DRV_OP_MODE, data);
 	qlcnic_api_unlock(adapter);
 err_lock:
 	return ret;
@@ -632,6 +644,7 @@ qlcnic_initialize_nic(struct qlcnic_adapter *adapter)
 	int err;
 	struct qlcnic_info nic_info;
 
+	memset(&nic_info, 0, sizeof(struct qlcnic_info));
 	err = qlcnic_get_nic_info(adapter, &nic_info, adapter->ahw->pci_func);
 	if (err)
 		return err;
@@ -798,8 +811,7 @@ qlcnic_check_eswitch_mode(struct qlcnic_adapter *adapter)
 	return err;
 }
 
-static int
-qlcnic_set_default_offload_settings(struct qlcnic_adapter *adapter)
+static int qlcnic_set_default_offload_settings(struct qlcnic_adapter *adapter)
 {
 	struct qlcnic_esw_func_cfg esw_cfg;
 	struct qlcnic_npar_info *npar;
@@ -808,16 +820,16 @@ qlcnic_set_default_offload_settings(struct qlcnic_adapter *adapter)
 	if (adapter->need_fw_reset)
 		return 0;
 
-	for (i = 0; i < QLCNIC_MAX_PCI_FUNC; i++) {
-		if (adapter->npars[i].type != QLCNIC_TYPE_NIC)
-			continue;
+	for (i = 0; i < adapter->ahw->act_pci_func; i++) {
 		memset(&esw_cfg, 0, sizeof(struct qlcnic_esw_func_cfg));
-		esw_cfg.pci_func = i;
-		esw_cfg.offload_flags = BIT_0;
+		esw_cfg.pci_func = adapter->npars[i].pci_func;
 		esw_cfg.mac_override = BIT_0;
 		esw_cfg.promisc_mode = BIT_0;
-		if (adapter->ahw->capabilities  & QLCNIC_FW_CAPABILITY_TSO)
-			esw_cfg.offload_flags |= (BIT_1 | BIT_2);
+		if (qlcnic_82xx_check(adapter)) {
+			esw_cfg.offload_flags = BIT_0;
+			if (QLCNIC_IS_TSO_CAPABLE(adapter))
+				esw_cfg.offload_flags |= (BIT_1 | BIT_2);
+		}
 		if (qlcnic_config_switch_port(adapter, &esw_cfg))
 			return -EIO;
 		npar = &adapter->npars[i];
@@ -855,22 +867,24 @@ qlcnic_reset_eswitch_config(struct qlcnic_adapter *adapter,
 	return 0;
 }
 
-static int
-qlcnic_reset_npar_config(struct qlcnic_adapter *adapter)
+static int qlcnic_reset_npar_config(struct qlcnic_adapter *adapter)
 {
 	int i, err;
 	struct qlcnic_npar_info *npar;
 	struct qlcnic_info nic_info;
+	u8 pci_func;
 
-	if (!adapter->need_fw_reset)
-		return 0;
+	if (qlcnic_82xx_check(adapter))
+		if (!adapter->need_fw_reset)
+			return 0;
 
 	/* Set the NPAR config data after FW reset */
-	for (i = 0; i < QLCNIC_MAX_PCI_FUNC; i++) {
+	for (i = 0; i < adapter->ahw->act_pci_func; i++) {
 		npar = &adapter->npars[i];
-		if (npar->type != QLCNIC_TYPE_NIC)
-			continue;
-		err = qlcnic_get_nic_info(adapter, &nic_info, i);
+		pci_func = npar->pci_func;
+		memset(&nic_info, 0, sizeof(struct qlcnic_info));
+		err = qlcnic_get_nic_info(adapter,
+					  &nic_info, pci_func);
 		if (err)
 			return err;
 		nic_info.min_tx_bw = npar->min_bw;
@@ -881,11 +895,12 @@ qlcnic_reset_npar_config(struct qlcnic_adapter *adapter)
 
 		if (npar->enable_pm) {
 			err = qlcnic_config_port_mirroring(adapter,
-							npar->dest_npar, 1, i);
+							   npar->dest_npar, 1,
+							   pci_func);
 			if (err)
 				return err;
 		}
-		err = qlcnic_reset_eswitch_config(adapter, npar, i);
+		err = qlcnic_reset_eswitch_config(adapter, npar, pci_func);
 		if (err)
 			return err;
 	}
-- 
1.7.1

^ permalink raw reply related

* [PATCH 6/7] qlcnic: fix bug in LRO descriptor access macro
From: Sony Chacko @ 2012-12-04 13:33 UTC (permalink / raw)
  To: davem; +Cc: netdev, Dept_NX_Linux_NIC_Driver, Sony Chacko
In-Reply-To: <1354628038-2234-1-git-send-email-sony.chacko@qlogic.com>

From: Sony Chacko <sony.chacko@qlogic.com>

Signed-off-by: Sony Chacko <sony.chacko@qlogic.com>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index 880a9ca..6f82812 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -66,7 +66,7 @@
 	(((sts_data) >> 58) & 0x03F)
 
 #define qlcnic_get_lro_sts_refhandle(sts_data) 	\
-	((sts_data) & 0x0FFFF)
+	((sts_data) & 0x07FFF)
 #define qlcnic_get_lro_sts_length(sts_data)	\
 	(((sts_data) >> 16) & 0x0FFFF)
 #define qlcnic_get_lro_sts_l2_hdr_offset(sts_data)	\
-- 
1.7.1

^ permalink raw reply related

* [PATCH 4/7] qlcnic: get board name API
From: Sony Chacko @ 2012-12-04 13:33 UTC (permalink / raw)
  To: davem; +Cc: netdev, Dept_NX_Linux_NIC_Driver, Sony Chacko
In-Reply-To: <1354628038-2234-1-git-send-email-sony.chacko@qlogic.com>

From: Sony Chacko <sony.chacko@qlogic.com>

Cleanup get board information API.

Signed-off-by: Sony Chacko <sony.chacko@qlogic.com>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic.h      |   26 +---------------
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c |   35 ++++++++++++++++++----
 2 files changed, 30 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index 528d88c..4d85c70 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -1444,7 +1444,7 @@ void qlcnic_set_eswitch_port_features(struct qlcnic_adapter *,
  */
 
 #define QLCNIC_MAX_BOARD_NAME_LEN 100
-struct qlcnic_brdinfo {
+struct qlcnic_board_info {
 	unsigned short  vendor;
 	unsigned short  device;
 	unsigned short  sub_vendor;
@@ -1452,30 +1452,6 @@ struct qlcnic_brdinfo {
 	char short_name[QLCNIC_MAX_BOARD_NAME_LEN];
 };
 
-static const struct qlcnic_brdinfo qlcnic_boards[] = {
-	{0x1077, 0x8020, 0x1077, 0x203,
-		"8200 Series Single Port 10GbE Converged Network Adapter "
-		"(TCP/IP Networking)"},
-	{0x1077, 0x8020, 0x1077, 0x207,
-		"8200 Series Dual Port 10GbE Converged Network Adapter "
-		"(TCP/IP Networking)"},
-	{0x1077, 0x8020, 0x1077, 0x20b,
-		"3200 Series Dual Port 10Gb Intelligent Ethernet Adapter"},
-	{0x1077, 0x8020, 0x1077, 0x20c,
-		"3200 Series Quad Port 1Gb Intelligent Ethernet Adapter"},
-	{0x1077, 0x8020, 0x1077, 0x20f,
-		"3200 Series Single Port 10Gb Intelligent Ethernet Adapter"},
-	{0x1077, 0x8020, 0x103c, 0x3733,
-		"NC523SFP 10Gb 2-port Server Adapter"},
-	{0x1077, 0x8020, 0x103c, 0x3346,
-		"CN1000Q Dual Port Converged Network Adapter"},
-	{0x1077, 0x8020, 0x1077, 0x210,
-		"QME8242-k 10GbE Dual Port Mezzanine Card"},
-	{0x1077, 0x8020, 0x0, 0x0, "cLOM8214 1/10GbE Controller"},
-};
-
-#define NUM_SUPPORTED_BOARDS ARRAY_SIZE(qlcnic_boards)
-
 static inline u32 qlcnic_tx_avail(struct qlcnic_host_tx_ring *tx_ring)
 {
 	if (likely(tx_ring->producer < tx_ring->sw_consumer))
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 80030af..bfcd004 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -118,6 +118,30 @@ static const u32 msi_tgt_status[8] = {
 	ISR_INT_TARGET_STATUS_F6, ISR_INT_TARGET_STATUS_F7
 };
 
+static const struct qlcnic_board_info qlcnic_boards[] = {
+	{0x1077, 0x8020, 0x1077, 0x203,
+	 "8200 Series Single Port 10GbE Converged Network Adapter"
+	 "(TCP/IP Networking)"},
+	{0x1077, 0x8020, 0x1077, 0x207,
+	 "8200 Series Dual Port 10GbE Converged Network Adapter"
+	 "(TCP/IP Networking)"},
+	{0x1077, 0x8020, 0x1077, 0x20b,
+	 "3200 Series Dual Port 10Gb Intelligent Ethernet Adapter"},
+	{0x1077, 0x8020, 0x1077, 0x20c,
+	 "3200 Series Quad Port 1Gb Intelligent Ethernet Adapter"},
+	{0x1077, 0x8020, 0x1077, 0x20f,
+	 "3200 Series Single Port 10Gb Intelligent Ethernet Adapter"},
+	{0x1077, 0x8020, 0x103c, 0x3733,
+	 "NC523SFP 10Gb 2-port Server Adapter"},
+	{0x1077, 0x8020, 0x103c, 0x3346,
+	 "CN1000Q Dual Port Converged Network Adapter"},
+	{0x1077, 0x8020, 0x1077, 0x210,
+	 "QME8242-k 10GbE Dual Port Mezzanine Card"},
+	{0x1077, 0x8020, 0x0, 0x0, "cLOM8214 1/10GbE Controller"},
+};
+
+#define NUM_SUPPORTED_BOARDS ARRAY_SIZE(qlcnic_boards)
+
 static const
 struct qlcnic_legacy_intr_set legacy_intr[] = QLCNIC_LEGACY_INTR_CONFIG;
 
@@ -525,7 +549,7 @@ static int qlcnic_setup_pci_map(struct pci_dev *pdev,
 	return 0;
 }
 
-static void get_brd_name(struct qlcnic_adapter *adapter, char *name)
+static void qlcnic_get_board_name(struct qlcnic_adapter *adapter, char *name)
 {
 	struct pci_dev *pdev = adapter->pdev;
 	int i, found = 0;
@@ -1467,7 +1491,7 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	struct qlcnic_adapter *adapter = NULL;
 	int err, pci_using_dac = -1;
 	uint8_t revision_id;
-	char brd_name[QLCNIC_MAX_BOARD_NAME_LEN];
+	char board_name[QLCNIC_MAX_BOARD_NAME_LEN];
 
 	err = pci_enable_device(pdev);
 	if (err)
@@ -1547,11 +1571,10 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		dev_warn(&pdev->dev, "failed to read mac addr\n");
 
 	if (adapter->portnum == 0) {
-		get_brd_name(adapter, brd_name);
-
+		qlcnic_get_board_name(adapter, board_name);
 		pr_info("%s: %s Board Chip rev 0x%x\n",
-				module_name(THIS_MODULE),
-				brd_name, adapter->ahw->revision_id);
+			module_name(THIS_MODULE),
+			board_name, adapter->ahw->revision_id);
 	}
 
 	qlcnic_clear_stats(adapter);
-- 
1.7.1

^ permalink raw reply related

* [PATCH 3/7] qlcnic: modify PCI and register access routines
From: Sony Chacko @ 2012-12-04 13:33 UTC (permalink / raw)
  To: davem; +Cc: netdev, Dept_NX_Linux_NIC_Driver, Sony Chacko
In-Reply-To: <1354628038-2234-1-git-send-email-sony.chacko@qlogic.com>

From: Sony Chacko <sony.chacko@qlogic.com>

Refactor 82xx driver to support new adapter
Update PCI and hardware access routines

Signed-off-by: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
Signed-off-by: Sony Chacko <sony.chacko@qlogic.com>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic.h      |    4 +-
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_hdr.h  |   40 ++--
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c   |  277 +++++++++++++---------
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c |   63 +++---
 4 files changed, 222 insertions(+), 162 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index 352a1e4..528d88c 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -1279,7 +1279,7 @@ struct qlcnic_cmd_args {
 int qlcnic_fw_cmd_get_minidump_temp(struct qlcnic_adapter *adapter);
 int qlcnic_fw_cmd_set_port(struct qlcnic_adapter *adapter, u32 config);
 
-u32 qlcnic_hw_read_wx_2M(struct qlcnic_adapter *adapter, ulong off);
+int qlcnic_hw_read_wx_2M(struct qlcnic_adapter *adapter, ulong off);
 int qlcnic_hw_write_wx_2M(struct qlcnic_adapter *, ulong off, u32 data);
 int qlcnic_pci_mem_write_2M(struct qlcnic_adapter *, u64 off, u64 data);
 int qlcnic_pci_mem_read_2M(struct qlcnic_adapter *, u64 off, u64 *data);
@@ -1345,7 +1345,7 @@ int qlcnic_rom_fast_read_words(struct qlcnic_adapter *adapter, int addr,
 int qlcnic_alloc_sw_resources(struct qlcnic_adapter *adapter);
 void qlcnic_free_sw_resources(struct qlcnic_adapter *adapter);
 
-void __iomem *qlcnic_get_ioaddr(struct qlcnic_adapter *, u32);
+void __iomem *qlcnic_get_ioaddr(struct qlcnic_hardware_context *, u32);
 
 int qlcnic_alloc_hw_resources(struct qlcnic_adapter *adapter);
 void qlcnic_free_hw_resources(struct qlcnic_adapter *adapter);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hdr.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hdr.h
index bd5030e..49cc1ac 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hdr.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hdr.h
@@ -816,55 +816,63 @@ enum {
 #define LSD(x)  ((uint32_t)((uint64_t)(x)))
 #define MSD(x)  ((uint32_t)((((uint64_t)(x)) >> 16) >> 16))
 
+#define QLCNIC_MS_CTRL			0x41000090
+#define QLCNIC_MS_ADDR_LO		0x41000094
+#define QLCNIC_MS_ADDR_HI		0x41000098
+#define QLCNIC_MS_WRTDATA_LO		0x410000A0
+#define QLCNIC_MS_WRTDATA_HI		0x410000A4
+#define QLCNIC_MS_WRTDATA_ULO		0x410000B0
+#define QLCNIC_MS_WRTDATA_UHI		0x410000B4
+#define QLCNIC_MS_RDDATA_LO		0x410000A8
+#define QLCNIC_MS_RDDATA_HI		0x410000AC
+#define QLCNIC_MS_RDDATA_ULO		0x410000B8
+#define QLCNIC_MS_RDDATA_UHI		0x410000BC
+
+#define QLCNIC_TA_WRITE_ENABLE	(TA_CTL_ENABLE | TA_CTL_WRITE)
+#define QLCNIC_TA_WRITE_START	(TA_CTL_START | TA_CTL_ENABLE | TA_CTL_WRITE)
+#define QLCNIC_TA_START_ENABLE	(TA_CTL_START | TA_CTL_ENABLE)
+
 #define	QLCNIC_LEGACY_INTR_CONFIG					\
 {									\
 	{								\
 		.int_vec_bit	=	PCIX_INT_VECTOR_BIT_F0,		\
 		.tgt_status_reg	=	ISR_INT_TARGET_STATUS,		\
-		.tgt_mask_reg	=	ISR_INT_TARGET_MASK,		\
-		.pci_int_reg	=	ISR_MSI_INT_TRIGGER(0) },	\
+		.tgt_mask_reg	=	ISR_INT_TARGET_MASK, },		\
 									\
 	{								\
 		.int_vec_bit	=	PCIX_INT_VECTOR_BIT_F1,		\
 		.tgt_status_reg	=	ISR_INT_TARGET_STATUS_F1,	\
-		.tgt_mask_reg	=	ISR_INT_TARGET_MASK_F1,		\
-		.pci_int_reg	=	ISR_MSI_INT_TRIGGER(1) },	\
+		.tgt_mask_reg	=	ISR_INT_TARGET_MASK_F1, },	\
 									\
 	{								\
 		.int_vec_bit	=	PCIX_INT_VECTOR_BIT_F2,		\
 		.tgt_status_reg	=	ISR_INT_TARGET_STATUS_F2,	\
-		.tgt_mask_reg	=	ISR_INT_TARGET_MASK_F2,		\
-		.pci_int_reg	=	ISR_MSI_INT_TRIGGER(2) },	\
+		.tgt_mask_reg	=	ISR_INT_TARGET_MASK_F2, },	\
 									\
 	{								\
 		.int_vec_bit	=	PCIX_INT_VECTOR_BIT_F3,		\
 		.tgt_status_reg	=	ISR_INT_TARGET_STATUS_F3,	\
-		.tgt_mask_reg	=	ISR_INT_TARGET_MASK_F3,		\
-		.pci_int_reg	=	ISR_MSI_INT_TRIGGER(3) },	\
+		.tgt_mask_reg	=	ISR_INT_TARGET_MASK_F3, },	\
 									\
 	{								\
 		.int_vec_bit	=	PCIX_INT_VECTOR_BIT_F4,		\
 		.tgt_status_reg	=	ISR_INT_TARGET_STATUS_F4,	\
-		.tgt_mask_reg	=	ISR_INT_TARGET_MASK_F4,		\
-		.pci_int_reg	=	ISR_MSI_INT_TRIGGER(4) },	\
+		.tgt_mask_reg	=	ISR_INT_TARGET_MASK_F4, },	\
 									\
 	{								\
 		.int_vec_bit	=	PCIX_INT_VECTOR_BIT_F5,		\
 		.tgt_status_reg	=	ISR_INT_TARGET_STATUS_F5,	\
-		.tgt_mask_reg	=	ISR_INT_TARGET_MASK_F5,		\
-		.pci_int_reg	=	ISR_MSI_INT_TRIGGER(5) },	\
+		.tgt_mask_reg	=	ISR_INT_TARGET_MASK_F5, },	\
 									\
 	{								\
 		.int_vec_bit	=	PCIX_INT_VECTOR_BIT_F6,		\
 		.tgt_status_reg	=	ISR_INT_TARGET_STATUS_F6,	\
-		.tgt_mask_reg	=	ISR_INT_TARGET_MASK_F6,		\
-		.pci_int_reg	=	ISR_MSI_INT_TRIGGER(6) },	\
+		.tgt_mask_reg	=	ISR_INT_TARGET_MASK_F6, },	\
 									\
 	{								\
 		.int_vec_bit	=	PCIX_INT_VECTOR_BIT_F7,		\
 		.tgt_status_reg	=	ISR_INT_TARGET_STATUS_F7,	\
-		.tgt_mask_reg	=	ISR_INT_TARGET_MASK_F7,		\
-		.pci_int_reg	=	ISR_MSI_INT_TRIGGER(7) },	\
+		.tgt_mask_reg	=	ISR_INT_TARGET_MASK_F7, },	\
 }
 
 /* NIU REGS */
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
index 382c6ac..fc48e00 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
@@ -6,6 +6,7 @@
  */
 
 #include "qlcnic.h"
+#include "qlcnic_hdr.h"
 
 #include <linux/slab.h>
 #include <net/ip.h>
@@ -22,6 +23,15 @@
 #define CRB_HI(off)	((crb_hub_agt[CRB_BLK(off)] << 20) | ((off) & 0xf0000))
 #define CRB_INDIRECT_2M	(0x1e0000UL)
 
+struct qlcnic_ms_reg_ctrl {
+	u32 ocm_window;
+	u32 control;
+	u32 hi;
+	u32 low;
+	u32 rd[4];
+	u32 wd[4];
+	u64 off;
+};
 
 #ifndef readq
 static inline u64 readq(void __iomem *addr)
@@ -266,10 +276,44 @@ static const unsigned crb_hub_agt[64] = {
 	0,
 };
 
+static const u32 msi_tgt_status[8] = {
+	ISR_INT_TARGET_STATUS, ISR_INT_TARGET_STATUS_F1,
+	ISR_INT_TARGET_STATUS_F2, ISR_INT_TARGET_STATUS_F3,
+	ISR_INT_TARGET_STATUS_F4, ISR_INT_TARGET_STATUS_F5,
+	ISR_INT_TARGET_STATUS_F6, ISR_INT_TARGET_STATUS_F7
+};
+
 /*  PCI Windowing for DDR regions.  */
 
 #define QLCNIC_PCIE_SEM_TIMEOUT	10000
 
+static void qlcnic_read_window_reg(u32 addr, void __iomem *bar0, u32 *data)
+{
+	u32 dest;
+	void __iomem *val;
+
+	dest = addr & 0xFFFF0000;
+	val = bar0 + QLCNIC_FW_DUMP_REG1;
+	writel(dest, val);
+	readl(val);
+	val = bar0 + QLCNIC_FW_DUMP_REG2 + LSW(addr);
+	*data = readl(val);
+}
+
+static void qlcnic_write_window_reg(u32 addr, void __iomem *bar0, u32 data)
+{
+	u32 dest;
+	void __iomem *val;
+
+	dest = addr & 0xFFFF0000;
+	val = bar0 + QLCNIC_FW_DUMP_REG1;
+	writel(dest, val);
+	readl(val);
+	val = bar0 + QLCNIC_FW_DUMP_REG2 + LSW(addr);
+	writel(data, val);
+	readl(val);
+}
+
 int
 qlcnic_pcie_sem_lock(struct qlcnic_adapter *adapter, int sem, u32 id_reg)
 {
@@ -300,6 +344,23 @@ qlcnic_pcie_sem_unlock(struct qlcnic_adapter *adapter, int sem)
 	QLCRD32(adapter, QLCNIC_PCIE_REG(PCIE_SEM_UNLOCK(sem)));
 }
 
+static int qlcnic_ind_rd(struct qlcnic_adapter *adapter, u32 addr)
+{
+	u32 data;
+
+	if (qlcnic_82xx_check(adapter))
+		qlcnic_read_window_reg(addr, adapter->ahw->pci_base0, &data);
+	else
+		return -EIO;
+	return data;
+}
+
+static void qlcnic_ind_wr(struct qlcnic_adapter *adapter, u32 addr, u32 data)
+{
+	if (qlcnic_82xx_check(adapter))
+		qlcnic_write_window_reg(addr, adapter->ahw->pci_base0, data);
+}
+
 static int
 qlcnic_send_cmd_descs(struct qlcnic_adapter *adapter,
 		struct cmd_desc_type0 *cmd_desc_arr, int nr_desc)
@@ -863,9 +924,8 @@ int qlcnic_set_features(struct net_device *netdev, netdev_features_t features)
  *	 0 if no window access is needed. 'off' is set to 2M addr
  * In: 'off' is offset from base in 128M pci map
  */
-static int
-qlcnic_pci_get_crb_addr_2M(struct qlcnic_adapter *adapter,
-		ulong off, void __iomem **addr)
+static int qlcnic_pci_get_crb_addr_2M(struct qlcnic_hardware_context *ahw,
+				      ulong off, void __iomem **addr)
 {
 	const struct crb_128M_2M_sub_block_map *m;
 
@@ -880,7 +940,7 @@ qlcnic_pci_get_crb_addr_2M(struct qlcnic_adapter *adapter,
 	m = &crb_128M_2M_map[CRB_BLK(off)].sub_block[CRB_SUBBLK(off)];
 
 	if (m->valid && (m->start_128M <= off) && (m->end_128M > off)) {
-		*addr = adapter->ahw->pci_base0 + m->start_2M +
+		*addr = ahw->pci_base0 + m->start_2M +
 			(off - m->start_128M);
 		return 0;
 	}
@@ -888,7 +948,7 @@ qlcnic_pci_get_crb_addr_2M(struct qlcnic_adapter *adapter,
 	/*
 	 * Not in direct map, use crb window
 	 */
-	*addr = adapter->ahw->pci_base0 + CRB_INDIRECT_2M + (off & MASK(16));
+	*addr = ahw->pci_base0 + CRB_INDIRECT_2M + (off & MASK(16));
 	return 1;
 }
 
@@ -929,7 +989,7 @@ qlcnic_hw_write_wx_2M(struct qlcnic_adapter *adapter, ulong off, u32 data)
 	int rv;
 	void __iomem *addr = NULL;
 
-	rv = qlcnic_pci_get_crb_addr_2M(adapter, off, &addr);
+	rv = qlcnic_pci_get_crb_addr_2M(adapter->ahw, off, &addr);
 
 	if (rv == 0) {
 		writel(data, addr);
@@ -954,15 +1014,14 @@ qlcnic_hw_write_wx_2M(struct qlcnic_adapter *adapter, ulong off, u32 data)
 	return -EIO;
 }
 
-u32
-qlcnic_hw_read_wx_2M(struct qlcnic_adapter *adapter, ulong off)
+int qlcnic_hw_read_wx_2M(struct qlcnic_adapter *adapter, ulong off)
 {
 	unsigned long flags;
 	int rv;
 	u32 data = -1;
 	void __iomem *addr = NULL;
 
-	rv = qlcnic_pci_get_crb_addr_2M(adapter, off, &addr);
+	rv = qlcnic_pci_get_crb_addr_2M(adapter->ahw, off, &addr);
 
 	if (rv == 0)
 		return readl(addr);
@@ -985,46 +1044,28 @@ qlcnic_hw_read_wx_2M(struct qlcnic_adapter *adapter, ulong off)
 }
 
 
-void __iomem *
-qlcnic_get_ioaddr(struct qlcnic_adapter *adapter, u32 offset)
+void __iomem *qlcnic_get_ioaddr(struct qlcnic_hardware_context *ahw,
+				u32 offset)
 {
 	void __iomem *addr = NULL;
 
-	WARN_ON(qlcnic_pci_get_crb_addr_2M(adapter, offset, &addr));
+	WARN_ON(qlcnic_pci_get_crb_addr_2M(ahw, offset, &addr));
 
 	return addr;
 }
 
-
-static int
-qlcnic_pci_set_window_2M(struct qlcnic_adapter *adapter,
-		u64 addr, u32 *start)
-{
-	u32 window;
-
-	window = OCM_WIN_P3P(addr);
-
-	writel(window, adapter->ahw->ocm_win_crb);
-	/* read back to flush */
-	readl(adapter->ahw->ocm_win_crb);
-
-	*start = QLCNIC_PCI_OCM0_2M + GET_MEM_OFFS_2M(addr);
-	return 0;
-}
-
-static int
-qlcnic_pci_mem_access_direct(struct qlcnic_adapter *adapter, u64 off,
-		u64 *data, int op)
+static int qlcnic_pci_mem_access_direct(struct qlcnic_adapter *adapter,
+					u32 window, u64 off, u64 *data, int op)
 {
 	void __iomem *addr;
-	int ret;
 	u32 start;
 
 	mutex_lock(&adapter->ahw->mem_lock);
 
-	ret = qlcnic_pci_set_window_2M(adapter, off, &start);
-	if (ret != 0)
-		goto unlock;
+	writel(window, adapter->ahw->ocm_win_crb);
+	/* read back to flush */
+	readl(adapter->ahw->ocm_win_crb);
+	start = QLCNIC_PCI_OCM0_2M + off;
 
 	addr = adapter->ahw->pci_base0 + start;
 
@@ -1033,10 +1074,12 @@ qlcnic_pci_mem_access_direct(struct qlcnic_adapter *adapter, u64 off,
 	else		/* write */
 		writeq(*data, addr);
 
-unlock:
-	mutex_unlock(&adapter->ahw->mem_lock);
+	/* Set window to 0 */
+	writel(0, adapter->ahw->ocm_win_crb);
+	readl(adapter->ahw->ocm_win_crb);
 
-	return ret;
+	mutex_unlock(&adapter->ahw->mem_lock);
+	return 0;
 }
 
 void
@@ -1061,52 +1104,74 @@ qlcnic_pci_camqm_write_2M(struct qlcnic_adapter *adapter, u64 off, u64 data)
 	mutex_unlock(&adapter->ahw->mem_lock);
 }
 
-int
-qlcnic_pci_mem_write_2M(struct qlcnic_adapter *adapter,
-		u64 off, u64 data)
+
+
+/* Set MS memory control data for different adapters */
+static void qlcnic_set_ms_controls(struct qlcnic_adapter *adapter, u64 off,
+				   struct qlcnic_ms_reg_ctrl *ms)
+{
+	ms->control = QLCNIC_MS_CTRL;
+	ms->low = QLCNIC_MS_ADDR_LO;
+	ms->hi = QLCNIC_MS_ADDR_HI;
+	if (off & 0xf) {
+		ms->wd[0] = QLCNIC_MS_WRTDATA_LO;
+		ms->rd[0] = QLCNIC_MS_RDDATA_LO;
+		ms->wd[1] = QLCNIC_MS_WRTDATA_HI;
+		ms->rd[1] = QLCNIC_MS_RDDATA_HI;
+		ms->wd[2] = QLCNIC_MS_WRTDATA_ULO;
+		ms->wd[3] = QLCNIC_MS_WRTDATA_UHI;
+		ms->rd[2] = QLCNIC_MS_RDDATA_ULO;
+		ms->rd[3] = QLCNIC_MS_RDDATA_UHI;
+	} else {
+		ms->wd[0] = QLCNIC_MS_WRTDATA_ULO;
+		ms->rd[0] = QLCNIC_MS_RDDATA_ULO;
+		ms->wd[1] = QLCNIC_MS_WRTDATA_UHI;
+		ms->rd[1] = QLCNIC_MS_RDDATA_UHI;
+		ms->wd[2] = QLCNIC_MS_WRTDATA_LO;
+		ms->wd[3] = QLCNIC_MS_WRTDATA_HI;
+		ms->rd[2] = QLCNIC_MS_RDDATA_LO;
+		ms->rd[3] = QLCNIC_MS_RDDATA_HI;
+	}
+
+	ms->ocm_window = OCM_WIN_P3P(off);
+	ms->off = GET_MEM_OFFS_2M(off);
+}
+
+int qlcnic_pci_mem_write_2M(struct qlcnic_adapter *adapter, u64 off, u64 data)
 {
-	int i, j, ret;
+	int j, ret = 0;
 	u32 temp, off8;
-	void __iomem *mem_crb;
+	struct qlcnic_ms_reg_ctrl ms;
 
 	/* Only 64-bit aligned access */
 	if (off & 7)
 		return -EIO;
 
-	/* P3 onward, test agent base for MIU and SIU is same */
-	if (ADDR_IN_RANGE(off, QLCNIC_ADDR_QDR_NET,
-				QLCNIC_ADDR_QDR_NET_MAX)) {
-		mem_crb = qlcnic_get_ioaddr(adapter,
-				QLCNIC_CRB_QDR_NET+MIU_TEST_AGT_BASE);
-		goto correct;
-	}
+	memset(&ms, 0, sizeof(struct qlcnic_ms_reg_ctrl));
+	if (!(ADDR_IN_RANGE(off, QLCNIC_ADDR_QDR_NET,
+			    QLCNIC_ADDR_QDR_NET_MAX) ||
+	      ADDR_IN_RANGE(off, QLCNIC_ADDR_DDR_NET,
+			    QLCNIC_ADDR_DDR_NET_MAX)))
+		return -EIO;
 
-	if (ADDR_IN_RANGE(off, QLCNIC_ADDR_DDR_NET, QLCNIC_ADDR_DDR_NET_MAX)) {
-		mem_crb = qlcnic_get_ioaddr(adapter,
-				QLCNIC_CRB_DDR_NET+MIU_TEST_AGT_BASE);
-		goto correct;
-	}
+	qlcnic_set_ms_controls(adapter, off, &ms);
 
 	if (ADDR_IN_RANGE(off, QLCNIC_ADDR_OCM0, QLCNIC_ADDR_OCM0_MAX))
-		return qlcnic_pci_mem_access_direct(adapter, off, &data, 1);
-
-	return -EIO;
+		return qlcnic_pci_mem_access_direct(adapter, ms.ocm_window,
+						    ms.off, &data, 1);
 
-correct:
 	off8 = off & ~0xf;
 
 	mutex_lock(&adapter->ahw->mem_lock);
 
-	writel(off8, (mem_crb + MIU_TEST_AGT_ADDR_LO));
-	writel(0, (mem_crb + MIU_TEST_AGT_ADDR_HI));
+	qlcnic_ind_wr(adapter, ms.low, off8);
+	qlcnic_ind_wr(adapter, ms.hi, 0);
 
-	i = 0;
-	writel(TA_CTL_ENABLE, (mem_crb + TEST_AGT_CTRL));
-	writel((TA_CTL_START | TA_CTL_ENABLE),
-			(mem_crb + TEST_AGT_CTRL));
+	qlcnic_ind_wr(adapter, ms.control, TA_CTL_ENABLE);
+	qlcnic_ind_wr(adapter, ms.control, QLCNIC_TA_START_ENABLE);
 
 	for (j = 0; j < MAX_CTL_CHECK; j++) {
-		temp = readl(mem_crb + TEST_AGT_CTRL);
+		temp = qlcnic_ind_rd(adapter, ms.control);
 		if ((temp & TA_CTL_BUSY) == 0)
 			break;
 	}
@@ -1116,24 +1181,18 @@ correct:
 		goto done;
 	}
 
-	i = (off & 0xf) ? 0 : 2;
-	writel(readl(mem_crb + MIU_TEST_AGT_RDDATA(i)),
-			mem_crb + MIU_TEST_AGT_WRDATA(i));
-	writel(readl(mem_crb + MIU_TEST_AGT_RDDATA(i+1)),
-			mem_crb + MIU_TEST_AGT_WRDATA(i+1));
-	i = (off & 0xf) ? 2 : 0;
-
-	writel(data & 0xffffffff,
-			mem_crb + MIU_TEST_AGT_WRDATA(i));
-	writel((data >> 32) & 0xffffffff,
-			mem_crb + MIU_TEST_AGT_WRDATA(i+1));
+	/* This is the modify part of read-modify-write */
+	qlcnic_ind_wr(adapter, ms.wd[0], qlcnic_ind_rd(adapter, ms.rd[0]));
+	qlcnic_ind_wr(adapter, ms.wd[1], qlcnic_ind_rd(adapter, ms.rd[1]));
+	/* This is the write part of read-modify-write */
+	qlcnic_ind_wr(adapter, ms.wd[2], data & 0xffffffff);
+	qlcnic_ind_wr(adapter, ms.wd[3], (data >> 32) & 0xffffffff);
 
-	writel((TA_CTL_ENABLE | TA_CTL_WRITE), (mem_crb + TEST_AGT_CTRL));
-	writel((TA_CTL_START | TA_CTL_ENABLE | TA_CTL_WRITE),
-			(mem_crb + TEST_AGT_CTRL));
+	qlcnic_ind_wr(adapter, ms.control, QLCNIC_TA_WRITE_ENABLE);
+	qlcnic_ind_wr(adapter, ms.control, QLCNIC_TA_WRITE_START);
 
 	for (j = 0; j < MAX_CTL_CHECK; j++) {
-		temp = readl(mem_crb + TEST_AGT_CTRL);
+		temp = qlcnic_ind_rd(adapter, ms.control);
 		if ((temp & TA_CTL_BUSY) == 0)
 			break;
 	}
@@ -1152,52 +1211,41 @@ done:
 	return ret;
 }
 
-int
-qlcnic_pci_mem_read_2M(struct qlcnic_adapter *adapter,
-		u64 off, u64 *data)
+int qlcnic_pci_mem_read_2M(struct qlcnic_adapter *adapter, u64 off, u64 *data)
 {
 	int j, ret;
 	u32 temp, off8;
 	u64 val;
-	void __iomem *mem_crb;
+	struct qlcnic_ms_reg_ctrl ms;
 
 	/* Only 64-bit aligned access */
 	if (off & 7)
 		return -EIO;
+	if (!(ADDR_IN_RANGE(off, QLCNIC_ADDR_QDR_NET,
+			    QLCNIC_ADDR_QDR_NET_MAX) ||
+	      ADDR_IN_RANGE(off, QLCNIC_ADDR_DDR_NET,
+			    QLCNIC_ADDR_DDR_NET_MAX)))
+		return -EIO;
 
-	/* P3 onward, test agent base for MIU and SIU is same */
-	if (ADDR_IN_RANGE(off, QLCNIC_ADDR_QDR_NET,
-				QLCNIC_ADDR_QDR_NET_MAX)) {
-		mem_crb = qlcnic_get_ioaddr(adapter,
-				QLCNIC_CRB_QDR_NET+MIU_TEST_AGT_BASE);
-		goto correct;
-	}
-
-	if (ADDR_IN_RANGE(off, QLCNIC_ADDR_DDR_NET, QLCNIC_ADDR_DDR_NET_MAX)) {
-		mem_crb = qlcnic_get_ioaddr(adapter,
-				QLCNIC_CRB_DDR_NET+MIU_TEST_AGT_BASE);
-		goto correct;
-	}
+	memset(&ms, 0, sizeof(struct qlcnic_ms_reg_ctrl));
+	qlcnic_set_ms_controls(adapter, off, &ms);
 
-	if (ADDR_IN_RANGE(off, QLCNIC_ADDR_OCM0, QLCNIC_ADDR_OCM0_MAX)) {
-		return qlcnic_pci_mem_access_direct(adapter,
-				off, data, 0);
-	}
+	if (ADDR_IN_RANGE(off, QLCNIC_ADDR_OCM0, QLCNIC_ADDR_OCM0_MAX))
+		return qlcnic_pci_mem_access_direct(adapter, ms.ocm_window,
+						    ms.off, data, 0);
 
-	return -EIO;
+	mutex_lock(&adapter->ahw->mem_lock);
 
-correct:
 	off8 = off & ~0xf;
 
-	mutex_lock(&adapter->ahw->mem_lock);
+	qlcnic_ind_wr(adapter, ms.low, off8);
+	qlcnic_ind_wr(adapter, ms.hi, 0);
 
-	writel(off8, (mem_crb + MIU_TEST_AGT_ADDR_LO));
-	writel(0, (mem_crb + MIU_TEST_AGT_ADDR_HI));
-	writel(TA_CTL_ENABLE, (mem_crb + TEST_AGT_CTRL));
-	writel((TA_CTL_START | TA_CTL_ENABLE), (mem_crb + TEST_AGT_CTRL));
+	qlcnic_ind_wr(adapter, ms.control, TA_CTL_ENABLE);
+	qlcnic_ind_wr(adapter, ms.control, QLCNIC_TA_START_ENABLE);
 
 	for (j = 0; j < MAX_CTL_CHECK; j++) {
-		temp = readl(mem_crb + TEST_AGT_CTRL);
+		temp = qlcnic_ind_rd(adapter, ms.control);
 		if ((temp & TA_CTL_BUSY) == 0)
 			break;
 	}
@@ -1208,13 +1256,10 @@ correct:
 					"failed to read through agent\n");
 		ret = -EIO;
 	} else {
-		off8 = MIU_TEST_AGT_RDDATA_LO;
-		if (off & 0xf)
-			off8 = MIU_TEST_AGT_RDDATA_UPPER_LO;
 
-		temp = readl(mem_crb + off8 + 4);
+		temp = qlcnic_ind_rd(adapter, ms.rd[3]);
 		val = (u64)temp << 32;
-		val |= readl(mem_crb + off8);
+		val |= qlcnic_ind_rd(adapter, ms.rd[2]);
 		*data = val;
 		ret = 0;
 	}
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 9a28735..80030af 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -283,32 +283,31 @@ static int qlcnic_enable_msix(struct qlcnic_adapter *adapter, u32 num_msix)
 	return err;
 }
 
-
 static void qlcnic_enable_msi_legacy(struct qlcnic_adapter *adapter)
 {
+	u32 offset, mask_reg;
 	const struct qlcnic_legacy_intr_set *legacy_intrp;
+	struct qlcnic_hardware_context *ahw = adapter->ahw;
 	struct pci_dev *pdev = adapter->pdev;
 
 	if (use_msi && !pci_enable_msi(pdev)) {
 		adapter->flags |= QLCNIC_MSI_ENABLED;
-		adapter->tgt_status_reg = qlcnic_get_ioaddr(adapter,
-				msi_tgt_status[adapter->ahw->pci_func]);
+		offset = msi_tgt_status[adapter->ahw->pci_func];
+		adapter->tgt_status_reg = qlcnic_get_ioaddr(adapter->ahw,
+							    offset);
 		dev_info(&pdev->dev, "using msi interrupts\n");
 		adapter->msix_entries[0].vector = pdev->irq;
 		return;
 	}
 
 	legacy_intrp = &legacy_intr[adapter->ahw->pci_func];
-
 	adapter->ahw->int_vec_bit = legacy_intrp->int_vec_bit;
-	adapter->tgt_status_reg = qlcnic_get_ioaddr(adapter,
-			legacy_intrp->tgt_status_reg);
-	adapter->tgt_mask_reg = qlcnic_get_ioaddr(adapter,
-			legacy_intrp->tgt_mask_reg);
-	adapter->isr_int_vec = qlcnic_get_ioaddr(adapter, ISR_INT_VECTOR);
-
-	adapter->crb_int_state_reg = qlcnic_get_ioaddr(adapter,
-			ISR_INT_STATE_REG);
+	offset = legacy_intrp->tgt_status_reg;
+	adapter->tgt_status_reg = qlcnic_get_ioaddr(ahw, offset);
+	mask_reg = legacy_intrp->tgt_mask_reg;
+	adapter->tgt_mask_reg = qlcnic_get_ioaddr(ahw, mask_reg);
+	adapter->isr_int_vec = qlcnic_get_ioaddr(ahw, ISR_INT_VECTOR);
+	adapter->crb_int_state_reg = qlcnic_get_ioaddr(ahw, ISR_INT_STATE_REG);
 	dev_info(&pdev->dev, "using legacy interrupts\n");
 	adapter->msix_entries[0].vector = pdev->irq;
 }
@@ -480,20 +479,32 @@ qlcnic_check_vf(struct qlcnic_adapter *adapter)
 		adapter->nic_ops = &qlcnic_ops;
 }
 
-static int
-qlcnic_setup_pci_map(struct qlcnic_adapter *adapter)
+#define QLCNIC_82XX_BAR0_LENGTH 0x00200000UL
+static void qlcnic_get_bar_length(u32 dev_id, ulong *bar)
+{
+	switch (dev_id) {
+	case PCI_DEVICE_ID_QLOGIC_QLE824X:
+		*bar = QLCNIC_82XX_BAR0_LENGTH;
+		break;
+	default:
+		*bar = 0;
+	}
+}
+
+static int qlcnic_setup_pci_map(struct pci_dev *pdev,
+				struct qlcnic_hardware_context *ahw)
 {
+	u32 offset;
 	void __iomem *mem_ptr0 = NULL;
 	resource_size_t mem_base;
-	unsigned long mem_len, pci_len0 = 0;
-
-	struct pci_dev *pdev = adapter->pdev;
+	unsigned long mem_len, pci_len0 = 0, bar0_len;
 
 	/* remap phys address */
 	mem_base = pci_resource_start(pdev, 0);	/* 0 is for BAR 0 */
 	mem_len = pci_resource_len(pdev, 0);
 
-	if (mem_len == QLCNIC_PCI_2MB_SIZE) {
+	qlcnic_get_bar_length(pdev->device, &bar0_len);
+	if (mem_len >= bar0_len) {
 
 		mem_ptr0 = pci_ioremap_bar(pdev, 0);
 		if (mem_ptr0 == NULL) {
@@ -506,15 +517,10 @@ qlcnic_setup_pci_map(struct qlcnic_adapter *adapter)
 	}
 
 	dev_info(&pdev->dev, "%dMB memory map\n", (int)(mem_len>>20));
-
-	adapter->ahw->pci_base0 = mem_ptr0;
-	adapter->ahw->pci_len0 = pci_len0;
-
-	qlcnic_check_vf(adapter);
-
-	adapter->ahw->ocm_win_crb = qlcnic_get_ioaddr(adapter,
-		QLCNIC_PCIX_PS_REG(PCIX_OCM_WINDOW_REG(
-			adapter->ahw->pci_func)));
+	ahw->pci_base0 = mem_ptr0;
+	ahw->pci_len0 = pci_len0;
+	offset = QLCNIC_PCIX_PS_REG(PCIX_OCM_WINDOW_REG(ahw->pci_func));
+	qlcnic_get_ioaddr(ahw, offset);
 
 	return 0;
 }
@@ -1510,9 +1516,10 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	spin_lock_init(&adapter->tx_clean_lock);
 	INIT_LIST_HEAD(&adapter->mac_list);
 
-	err = qlcnic_setup_pci_map(adapter);
+	err = qlcnic_setup_pci_map(pdev, adapter->ahw);
 	if (err)
 		goto err_out_free_hw;
+	qlcnic_check_vf(adapter);
 
 	/* This will be reset for mezz cards  */
 	adapter->portnum = adapter->ahw->pci_func;
-- 
1.7.1

^ permalink raw reply related

* [PATCH 2/7] qlcnic: move HW specific data to seperate structure
From: Sony Chacko @ 2012-12-04 13:33 UTC (permalink / raw)
  To: davem; +Cc: netdev, Dept_NX_Linux_NIC_Driver, Sony Chacko
In-Reply-To: <1354628038-2234-1-git-send-email-sony.chacko@qlogic.com>

From: Sony Chacko <sony.chacko@qlogic.com>

Move HW specific data to a seperate structure as part of
refactoring 82xx adapter driver.

Signed-off-by: Anirban Chakraborty <anirban.chakraborty@qlogic.com>
Signed-off-by: Sony Chacko <sony.chacko@qlogic.com>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic.h        |  67 ++++++-------
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c    |  29 +++---
 .../net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c    |  70 +++++++-------
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c     |   2 +-
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c   |  30 +++---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c     |  24 ++---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c   | 104 ++++++++++-----------
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c  |  24 ++---
 8 files changed, 177 insertions(+), 173 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index ebc5b06..352a1e4 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -365,11 +365,41 @@ struct qlcnic_hardware_context {
 	u8 pci_func;
 	u8 linkup;
 	u8 loopback_state;
+	u8 beacon_state;
+	u8 has_link_events;
+	u8 fw_type;
+	u8 physical_port;
+	u8 reset_context;
+	u8 msix_supported;
+	u8 max_mac_filters;
+	u8 mc_enabled;
+	u8 max_mc_count;
+	u8 diag_test;
+	u8 num_msix;
+	u8 nic_mode;
+	char diag_cnt;
+
 	u16 port_type;
 	u16 board_type;
 
-	u8 beacon_state;
+	u16 link_speed;
+	u16 link_duplex;
+	u16 link_autoneg;
+	u16 module_type;
+
+	u16 op_mode;
+	u16 switch_mode;
+	u16 max_tx_ques;
+	u16 max_rx_ques;
+	u16 max_mtu;
+	u32 msg_enable;
+	u16 act_pci_func;
 
+	u32 capabilities;
+	u32 temp;
+	u32 int_vec_bit;
+	u32 fw_hal_version;
+	struct qlcnic_hardware_ops *hw_ops;
 	struct qlcnic_nic_intr_coalesce coal;
 	struct qlcnic_fw_dump fw_dump;
 };
@@ -430,6 +460,7 @@ struct qlcnic_host_sds_ring {
 } ____cacheline_internodealigned_in_smp;
 
 struct qlcnic_host_tx_ring {
+	u16 ctx_id;
 	u32 producer;
 	u32 sw_consumer;
 	u32 num_desc;
@@ -894,6 +925,7 @@ struct qlcnic_adapter {
 	unsigned long state;
 	u32 flags;
 
+	int max_drv_tx_rings;
 	u16 num_txd;
 	u16 num_rxd;
 	u16 num_jumbo_rxd;
@@ -902,57 +934,28 @@ struct qlcnic_adapter {
 
 	u8 max_rds_rings;
 	u8 max_sds_rings;
-	u8 msix_supported;
 	u8 portnum;
-	u8 physical_port;
-	u8 reset_context;
 
-	u8 mc_enabled;
-	u8 max_mc_count;
 	u8 fw_wait_cnt;
 	u8 fw_fail_cnt;
 	u8 tx_timeo_cnt;
 	u8 need_fw_reset;
 
-	u8 has_link_events;
-	u8 fw_type;
-	u16 tx_context_id;
 	u16 is_up;
-
-	u16 link_speed;
-	u16 link_duplex;
-	u16 link_autoneg;
-	u16 module_type;
-
-	u16 op_mode;
-	u16 switch_mode;
-	u16 max_tx_ques;
-	u16 max_rx_ques;
-	u16 max_mtu;
 	u16 pvid;
 
-	u32 fw_hal_version;
-	u32 capabilities;
 	u32 irq;
-	u32 temp;
-
-	u32 int_vec_bit;
 	u32 heartbeat;
 
-	u8 max_mac_filters;
 	u8 dev_state;
-	u8 diag_test;
-	char diag_cnt;
 	u8 reset_ack_timeo;
 	u8 dev_init_timeo;
-	u16 msg_enable;
 
 	u8 mac_addr[ETH_ALEN];
 
 	u64 dev_rst_time;
 	u8 mac_learn;
 	unsigned long vlans[BITS_TO_LONGS(VLAN_N_VID)];
-
 	struct qlcnic_npar_info *npars;
 	struct qlcnic_eswitch *eswitch;
 	struct qlcnic_nic_template *nic_ops;
@@ -966,10 +969,8 @@ struct qlcnic_adapter {
 	void __iomem	*isr_int_vec;
 
 	struct msix_entry *msix_entries;
-
 	struct delayed_work fw_work;
 
-
 	struct qlcnic_filter_hash fhash;
 
 	spinlock_t tx_clean_lock;
@@ -1509,7 +1510,7 @@ struct qlcnic_nic_template {
 };
 
 #define QLCDB(adapter, lvl, _fmt, _args...) do {	\
-	if (NETIF_MSG_##lvl & adapter->msg_enable)	\
+	if (NETIF_MSG_##lvl & adapter->ahw->msg_enable)	\
 		printk(KERN_INFO "%s: %s: " _fmt,	\
 			 dev_name(&adapter->pdev->dev),	\
 			__func__, ##_args);		\
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
index bd31104..c5cfbaa 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ctx.c
@@ -35,7 +35,7 @@ qlcnic_issue_cmd(struct qlcnic_adapter *adapter, struct qlcnic_cmd_args *cmd)
 	struct qlcnic_hardware_context *ahw = adapter->ahw;
 
 	signature = QLCNIC_CDRP_SIGNATURE_MAKE(ahw->pci_func,
-		adapter->fw_hal_version);
+					       adapter->ahw->fw_hal_version);
 
 	/* Acquire semaphore before accessing CRB */
 	if (qlcnic_api_lock(adapter)) {
@@ -455,8 +455,7 @@ qlcnic_fw_cmd_create_tx_ctx(struct qlcnic_adapter *adapter)
 		temp = le32_to_cpu(prsp->cds_ring.host_producer_crb);
 		tx_ring->crb_cmd_producer = adapter->ahw->pci_base0 + temp;
 
-		adapter->tx_context_id =
-			le16_to_cpu(prsp->context_id);
+		adapter->tx_ring->ctx_id = le16_to_cpu(prsp->context_id);
 	} else {
 		dev_err(&adapter->pdev->dev,
 			"Failed to create tx ctx in firmware%d\n", err);
@@ -478,7 +477,7 @@ qlcnic_fw_cmd_destroy_tx_ctx(struct qlcnic_adapter *adapter)
 	struct qlcnic_cmd_args cmd;
 
 	memset(&cmd, 0, sizeof(cmd));
-	cmd.req.arg1 = adapter->tx_context_id;
+	cmd.req.arg1 = adapter->tx_ring->ctx_id;
 	cmd.req.arg2 = QLCNIC_DESTROY_CTX_RESET;
 	cmd.req.arg3 = 0;
 	cmd.req.cmd = QLCNIC_CDRP_CMD_DESTROY_TX_CTX;
@@ -750,7 +749,7 @@ int qlcnic_set_nic_info(struct qlcnic_adapter *adapter, struct qlcnic_info *nic)
 	struct qlcnic_info_le *nic_info;
 	size_t nic_size = sizeof(struct qlcnic_info_le);
 
-	if (adapter->op_mode != QLCNIC_MGMT_FUNC)
+	if (adapter->ahw->op_mode != QLCNIC_MGMT_FUNC)
 		return err;
 
 	nic_info_addr = dma_alloc_coherent(&adapter->pdev->dev, nic_size,
@@ -850,8 +849,8 @@ int qlcnic_config_port_mirroring(struct qlcnic_adapter *adapter, u8 id,
 	u32 arg1;
 	struct qlcnic_cmd_args cmd;
 
-	if (adapter->op_mode != QLCNIC_MGMT_FUNC ||
-		!(adapter->eswitch[id].flags & QLCNIC_SWITCH_ENABLE))
+	if (adapter->ahw->op_mode != QLCNIC_MGMT_FUNC ||
+	    !(adapter->eswitch[id].flags & QLCNIC_SWITCH_ENABLE))
 		return err;
 
 	arg1 = id | (enable_mirroring ? BIT_4 : 0);
@@ -890,8 +889,8 @@ int qlcnic_get_port_stats(struct qlcnic_adapter *adapter, const u8 func,
 	if (esw_stats == NULL)
 		return -ENOMEM;
 
-	if (adapter->op_mode != QLCNIC_MGMT_FUNC &&
-	    func != adapter->ahw->pci_func) {
+	if ((adapter->ahw->op_mode != QLCNIC_MGMT_FUNC) &&
+	    (func != adapter->ahw->pci_func)) {
 		dev_err(&adapter->pdev->dev,
 			"Not privilege to query stats for func=%d", func);
 		return -EIO;
@@ -1002,7 +1001,7 @@ int qlcnic_get_eswitch_stats(struct qlcnic_adapter *adapter, const u8 eswitch,
 
 	if (esw_stats == NULL)
 		return -ENOMEM;
-	if (adapter->op_mode != QLCNIC_MGMT_FUNC)
+	if (adapter->ahw->op_mode != QLCNIC_MGMT_FUNC)
 		return -EIO;
 	if (adapter->npars == NULL)
 		return -EIO;
@@ -1053,7 +1052,7 @@ int qlcnic_clear_esw_stats(struct qlcnic_adapter *adapter, const u8 func_esw,
 	u32 arg1;
 	struct qlcnic_cmd_args cmd;
 
-	if (adapter->op_mode != QLCNIC_MGMT_FUNC)
+	if (adapter->ahw->op_mode != QLCNIC_MGMT_FUNC)
 		return -EIO;
 
 	if (func_esw == QLCNIC_STATS_PORT) {
@@ -1126,7 +1125,7 @@ int qlcnic_config_switch_port(struct qlcnic_adapter *adapter,
 	struct qlcnic_cmd_args cmd;
 	u8 pci_func;
 
-	if (adapter->op_mode != QLCNIC_MGMT_FUNC)
+	if (adapter->ahw->op_mode != QLCNIC_MGMT_FUNC)
 		return err;
 	pci_func = esw_cfg->pci_func;
 	arg1 = (adapter->npars[pci_func].phy_port & BIT_0);
@@ -1141,7 +1140,7 @@ int qlcnic_config_switch_port(struct qlcnic_adapter *adapter,
 	case QLCNIC_PORT_DEFAULTS:
 		arg1 |= (BIT_4 | BIT_6 | BIT_7);
 		arg2 |= (BIT_0 | BIT_1);
-		if (adapter->capabilities & QLCNIC_FW_CAPABILITY_TSO)
+		if (adapter->ahw->capabilities & QLCNIC_FW_CAPABILITY_TSO)
 			arg2 |= (BIT_2 | BIT_3);
 		if (!(esw_cfg->discard_tagged))
 			arg1 &= ~BIT_4;
@@ -1194,10 +1193,10 @@ qlcnic_get_eswitch_port_config(struct qlcnic_adapter *adapter,
 {
 	u32 arg1, arg2;
 	u8 phy_port;
-	if (adapter->op_mode == QLCNIC_MGMT_FUNC)
+	if (adapter->ahw->op_mode == QLCNIC_MGMT_FUNC)
 		phy_port = adapter->npars[esw_cfg->pci_func].phy_port;
 	else
-		phy_port = adapter->physical_port;
+		phy_port = adapter->ahw->physical_port;
 	arg1 = phy_port;
 	arg1 |= (esw_cfg->pci_func << 8);
 	if (__qlcnic_get_eswitch_port_config(adapter, &arg1, &arg2))
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
index 4a9425b..74b9811 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_ethtool.c
@@ -208,9 +208,9 @@ qlcnic_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 				     ADVERTISED_1000baseT_Half |
 				     ADVERTISED_1000baseT_Full);
 
-		ethtool_cmd_speed_set(ecmd, adapter->link_speed);
-		ecmd->duplex = adapter->link_duplex;
-		ecmd->autoneg = adapter->link_autoneg;
+		ethtool_cmd_speed_set(ecmd, adapter->ahw->link_speed);
+		ecmd->duplex = adapter->ahw->link_duplex;
+		ecmd->autoneg = adapter->ahw->link_autoneg;
 
 	} else if (adapter->ahw->port_type == QLCNIC_XGBE) {
 		u32 val;
@@ -224,10 +224,10 @@ qlcnic_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 			ecmd->advertising = ADVERTISED_10000baseT_Full;
 		}
 
-		if (netif_running(dev) && adapter->has_link_events) {
-			ethtool_cmd_speed_set(ecmd, adapter->link_speed);
-			ecmd->autoneg = adapter->link_autoneg;
-			ecmd->duplex = adapter->link_duplex;
+		if (netif_running(dev) && adapter->ahw->has_link_events) {
+			ethtool_cmd_speed_set(ecmd, adapter->ahw->link_speed);
+			ecmd->autoneg = adapter->ahw->link_autoneg;
+			ecmd->duplex = adapter->ahw->link_duplex;
 			goto skip;
 		}
 
@@ -238,7 +238,7 @@ qlcnic_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 		return -EIO;
 
 skip:
-	ecmd->phy_address = adapter->physical_port;
+	ecmd->phy_address = adapter->ahw->physical_port;
 	ecmd->transceiver = XCVR_EXTERNAL;
 
 	switch (adapter->ahw->board_type) {
@@ -254,7 +254,7 @@ skip:
 		ecmd->supported |= SUPPORTED_TP;
 		ecmd->advertising |= ADVERTISED_TP;
 		ecmd->port = PORT_TP;
-		ecmd->autoneg =  adapter->link_autoneg;
+		ecmd->autoneg =  adapter->ahw->link_autoneg;
 		break;
 	case QLCNIC_BRDTYPE_P3P_IMEZ:
 	case QLCNIC_BRDTYPE_P3P_XG_LOM:
@@ -270,7 +270,7 @@ skip:
 		ecmd->advertising |= ADVERTISED_TP;
 		ecmd->supported |= SUPPORTED_TP;
 		check_sfp_module = netif_running(dev) &&
-			adapter->has_link_events;
+				   adapter->ahw->has_link_events;
 	case QLCNIC_BRDTYPE_P3P_10G_XFP:
 		ecmd->supported |= SUPPORTED_FIBRE;
 		ecmd->advertising |= ADVERTISED_FIBRE;
@@ -285,7 +285,7 @@ skip:
 				(ADVERTISED_FIBRE | ADVERTISED_TP);
 			ecmd->port = PORT_FIBRE;
 			check_sfp_module = netif_running(dev) &&
-				adapter->has_link_events;
+					   adapter->ahw->has_link_events;
 		} else {
 			ecmd->autoneg = AUTONEG_ENABLE;
 			ecmd->supported |= (SUPPORTED_TP | SUPPORTED_Autoneg);
@@ -301,7 +301,7 @@ skip:
 	}
 
 	if (check_sfp_module) {
-		switch (adapter->module_type) {
+		switch (adapter->ahw->module_type) {
 		case LINKEVENT_MODULE_OPTICAL_UNKNOWN:
 		case LINKEVENT_MODULE_OPTICAL_SRLR:
 		case LINKEVENT_MODULE_OPTICAL_LRM:
@@ -359,9 +359,9 @@ qlcnic_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 	else if (ret)
 		return -EIO;
 
-	adapter->link_speed = ethtool_cmd_speed(ecmd);
-	adapter->link_duplex = ecmd->duplex;
-	adapter->link_autoneg = ecmd->autoneg;
+	adapter->ahw->link_speed = ethtool_cmd_speed(ecmd);
+	adapter->ahw->link_duplex = ecmd->duplex;
+	adapter->ahw->link_autoneg = ecmd->autoneg;
 
 	if (!netif_running(dev))
 		return 0;
@@ -508,14 +508,15 @@ qlcnic_set_ringparam(struct net_device *dev,
 static void qlcnic_get_channels(struct net_device *dev,
 		struct ethtool_channels *channel)
 {
+	int min;
 	struct qlcnic_adapter *adapter = netdev_priv(dev);
 
-	channel->max_rx = rounddown_pow_of_two(min_t(int,
-			adapter->max_rx_ques, num_online_cpus()));
-	channel->max_tx = adapter->max_tx_ques;
+	min = min_t(int, adapter->ahw->max_rx_ques, num_online_cpus());
+	channel->max_rx = rounddown_pow_of_two(min);
+	channel->max_tx = adapter->ahw->max_tx_ques;
 
 	channel->rx_count = adapter->max_sds_rings;
-	channel->tx_count = adapter->max_tx_ques;
+	channel->tx_count = adapter->ahw->max_tx_ques;
 }
 
 static int qlcnic_set_channels(struct net_device *dev,
@@ -543,7 +544,7 @@ qlcnic_get_pauseparam(struct net_device *netdev,
 			  struct ethtool_pauseparam *pause)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
-	int port = adapter->physical_port;
+	int port = adapter->ahw->physical_port;
 	__u32 val;
 
 	if (adapter->ahw->port_type == QLCNIC_GBE) {
@@ -588,7 +589,7 @@ qlcnic_set_pauseparam(struct net_device *netdev,
 			  struct ethtool_pauseparam *pause)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
-	int port = adapter->physical_port;
+	int port = adapter->ahw->physical_port;
 	__u32 val;
 
 	/* read mode */
@@ -703,7 +704,7 @@ static int qlcnic_irq_test(struct net_device *netdev)
 	if (ret)
 		goto clear_it;
 
-	adapter->diag_cnt = 0;
+	adapter->ahw->diag_cnt = 0;
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.req.cmd = QLCNIC_CDRP_CMD_INTRPT_TEST;
 	cmd.req.arg1 = adapter->ahw->pci_func;
@@ -715,7 +716,7 @@ static int qlcnic_irq_test(struct net_device *netdev)
 
 	msleep(10);
 
-	ret = !adapter->diag_cnt;
+	ret = !adapter->ahw->diag_cnt;
 
 done:
 	qlcnic_diag_free_res(netdev, max_sds_rings);
@@ -761,7 +762,7 @@ static int qlcnic_do_lb_test(struct qlcnic_adapter *adapter, u8 mode)
 		qlcnic_create_loopback_buff(skb->data, adapter->mac_addr);
 		skb_put(skb, QLCNIC_ILB_PKT_SIZE);
 
-		adapter->diag_cnt = 0;
+		adapter->ahw->diag_cnt = 0;
 		qlcnic_xmit_frame(skb, adapter->netdev);
 
 		loop = 0;
@@ -770,11 +771,11 @@ static int qlcnic_do_lb_test(struct qlcnic_adapter *adapter, u8 mode)
 			qlcnic_process_rcv_ring_diag(sds_ring);
 			if (loop++ > QLCNIC_ILB_MAX_RCV_LOOP)
 				break;
-		} while (!adapter->diag_cnt);
+		} while (!adapter->ahw->diag_cnt);
 
 		dev_kfree_skb_any(skb);
 
-		if (!adapter->diag_cnt)
+		if (!adapter->ahw->diag_cnt)
 			QLCDB(adapter, DRV,
 			"LB Test: packet #%d was not received\n", i + 1);
 		else
@@ -800,14 +801,15 @@ static int qlcnic_loopback_test(struct net_device *netdev, u8 mode)
 	int loop = 0;
 	int ret;
 
-	if (!(adapter->capabilities & QLCNIC_FW_CAPABILITY_MULTI_LOOPBACK)) {
+	if (!(adapter->ahw->capabilities &
+	      QLCNIC_FW_CAPABILITY_MULTI_LOOPBACK)) {
 		netdev_info(netdev, "Firmware is not loopback test capable\n");
 		return -EOPNOTSUPP;
 	}
 
 	QLCDB(adapter, DRV, "%s loopback test in progress\n",
 		   mode == QLCNIC_ILB_MODE ? "internal" : "external");
-	if (adapter->op_mode == QLCNIC_NON_PRIV_FUNC) {
+	if (adapter->ahw->op_mode == QLCNIC_NON_PRIV_FUNC) {
 		netdev_warn(netdev, "Loopback test not supported for non "
 				"privilege function\n");
 		return 0;
@@ -826,7 +828,7 @@ static int qlcnic_loopback_test(struct net_device *netdev, u8 mode)
 	if (ret)
 		goto free_res;
 
-	adapter->diag_cnt = 0;
+	adapter->ahw->diag_cnt = 0;
 	do {
 		msleep(500);
 		qlcnic_process_rcv_ring_diag(sds_ring);
@@ -835,8 +837,8 @@ static int qlcnic_loopback_test(struct net_device *netdev, u8 mode)
 				" configure request\n");
 			ret = -QLCNIC_FW_NOT_RESPOND;
 			goto free_res;
-		} else if (adapter->diag_cnt) {
-			ret = adapter->diag_cnt;
+		} else if (adapter->ahw->diag_cnt) {
+			ret = adapter->ahw->diag_cnt;
 			goto free_res;
 		}
 	} while (!QLCNIC_IS_LB_CONFIGURED(adapter->ahw->loopback_state));
@@ -1028,7 +1030,7 @@ static int qlcnic_set_led(struct net_device *dev,
 	int max_sds_rings = adapter->max_sds_rings;
 	int err = -EIO, active = 1;
 
-	if (adapter->op_mode == QLCNIC_NON_PRIV_FUNC) {
+	if (adapter->ahw->op_mode == QLCNIC_NON_PRIV_FUNC) {
 		netdev_warn(dev, "LED test not supported for non "
 				"privilege function\n");
 		return -EOPNOTSUPP;
@@ -1207,14 +1209,14 @@ static u32 qlcnic_get_msglevel(struct net_device *netdev)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 
-	return adapter->msg_enable;
+	return adapter->ahw->msg_enable;
 }
 
 static void qlcnic_set_msglevel(struct net_device *netdev, u32 msglvl)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 
-	adapter->msg_enable = msglvl;
+	adapter->ahw->msg_enable = msglvl;
 }
 
 static int
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
index ff879cd..382c6ac 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
@@ -434,7 +434,7 @@ void qlcnic_set_multi(struct net_device *netdev)
 	}
 
 	if ((netdev->flags & IFF_ALLMULTI) ||
-	    (netdev_mc_count(netdev) > adapter->max_mc_count)) {
+	    (netdev_mc_count(netdev) > adapter->ahw->max_mc_count)) {
 		mode = VPORT_MISS_MODE_ACCEPT_MULTI;
 		goto send_fw_cmd;
 	}
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c
index d8610ea..de79cde 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_init.c
@@ -246,7 +246,8 @@ int qlcnic_alloc_sw_resources(struct qlcnic_adapter *adapter)
 			rds_ring->dma_size =
 				QLCNIC_P3P_RX_JUMBO_BUF_MAX_LEN;
 
-			if (adapter->capabilities & QLCNIC_FW_CAPABILITY_HW_LRO)
+			if (adapter->ahw->capabilities &
+			    QLCNIC_FW_CAPABILITY_HW_LRO)
 				rds_ring->dma_size += QLCNIC_LRO_BUFFER_EXTRA;
 
 			rds_ring->skb_size =
@@ -655,7 +656,7 @@ qlcnic_setup_idc_param(struct qlcnic_adapter *adapter) {
 			"Not an Ethernet NIC func=%u\n", val);
 		return -EIO;
 	}
-	adapter->physical_port = (val >> 2);
+	adapter->ahw->physical_port = (val >> 2);
 	if (qlcnic_rom_fast_read(adapter, QLCNIC_ROM_DEV_INIT_TIMEOUT, &timeo))
 		timeo = QLCNIC_INIT_TIMEOUT_SECS;
 
@@ -996,7 +997,7 @@ qlcnic_get_bootld_offs(struct qlcnic_adapter *adapter)
 	data_desc = qlcnic_get_data_desc(adapter, QLCNIC_UNI_DIR_SECT_BOOTLD,
 					 QLCNIC_UNI_BOOTLD_IDX_OFF);
 
-	if (adapter->fw_type == QLCNIC_UNIFIED_ROMIMAGE)
+	if (adapter->ahw->fw_type == QLCNIC_UNIFIED_ROMIMAGE)
 		offs = le32_to_cpu(data_desc->findex);
 
 	return (u8 *)&adapter->fw->data[offs];
@@ -1010,7 +1011,7 @@ qlcnic_get_fw_offs(struct qlcnic_adapter *adapter)
 
 	data_desc = qlcnic_get_data_desc(adapter, QLCNIC_UNI_DIR_SECT_FW,
 					 QLCNIC_UNI_FIRMWARE_IDX_OFF);
-	if (adapter->fw_type == QLCNIC_UNIFIED_ROMIMAGE)
+	if (adapter->ahw->fw_type == QLCNIC_UNIFIED_ROMIMAGE)
 		offs = le32_to_cpu(data_desc->findex);
 
 	return (u8 *)&adapter->fw->data[offs];
@@ -1024,7 +1025,7 @@ static u32 qlcnic_get_fw_size(struct qlcnic_adapter *adapter)
 	data_desc = qlcnic_get_data_desc(adapter, QLCNIC_UNI_DIR_SECT_FW,
 					 QLCNIC_UNI_FIRMWARE_IDX_OFF);
 
-	if (adapter->fw_type == QLCNIC_UNIFIED_ROMIMAGE)
+	if (adapter->ahw->fw_type == QLCNIC_UNIFIED_ROMIMAGE)
 		return le32_to_cpu(data_desc->size);
 	else
 		return le32_to_cpu(*(__le32 *)&unirom[QLCNIC_FW_SIZE_OFFSET]);
@@ -1039,7 +1040,7 @@ static u32 qlcnic_get_fw_version(struct qlcnic_adapter *adapter)
 	const u8 *ver_str;
 	int i, ret;
 
-	if (adapter->fw_type != QLCNIC_UNIFIED_ROMIMAGE) {
+	if (adapter->ahw->fw_type != QLCNIC_UNIFIED_ROMIMAGE) {
 		version_offset = *(__le32 *)&fw->data[QLCNIC_FW_VERSION_OFFSET];
 		return le32_to_cpu(version_offset);
 	}
@@ -1070,7 +1071,7 @@ static u32 qlcnic_get_bios_version(struct qlcnic_adapter *adapter)
 	u8 *version_offset;
 	__le32 temp;
 
-	if (adapter->fw_type != QLCNIC_UNIFIED_ROMIMAGE) {
+	if (adapter->ahw->fw_type != QLCNIC_UNIFIED_ROMIMAGE) {
 		version_offset = (u8 *)&fw->data[QLCNIC_BIOS_VERSION_OFFSET];
 		return le32_to_cpu(*(__le32 *)version_offset);
 	}
@@ -1141,7 +1142,7 @@ qlcnic_load_firmware(struct qlcnic_adapter *adapter)
 	struct pci_dev *pdev = adapter->pdev;
 
 	dev_info(&pdev->dev, "loading firmware from %s\n",
-			fw_name[adapter->fw_type]);
+		 fw_name[adapter->ahw->fw_type]);
 
 	if (fw) {
 		u64 data;
@@ -1233,7 +1234,7 @@ qlcnic_validate_firmware(struct qlcnic_adapter *adapter)
 	u32 ver, bios, min_size;
 	struct pci_dev *pdev = adapter->pdev;
 	const struct firmware *fw = adapter->fw;
-	u8 fw_type = adapter->fw_type;
+	u8 fw_type = adapter->ahw->fw_type;
 
 	if (fw_type == QLCNIC_UNIFIED_ROMIMAGE) {
 		if (qlcnic_validate_unified_romimage(adapter))
@@ -1278,7 +1279,7 @@ qlcnic_get_next_fwtype(struct qlcnic_adapter *adapter)
 {
 	u8 fw_type;
 
-	switch (adapter->fw_type) {
+	switch (adapter->ahw->fw_type) {
 	case QLCNIC_UNKNOWN_ROMIMAGE:
 		fw_type = QLCNIC_UNIFIED_ROMIMAGE;
 		break;
@@ -1289,7 +1290,7 @@ qlcnic_get_next_fwtype(struct qlcnic_adapter *adapter)
 		break;
 	}
 
-	adapter->fw_type = fw_type;
+	adapter->ahw->fw_type = fw_type;
 }
 
 
@@ -1299,16 +1300,17 @@ void qlcnic_request_firmware(struct qlcnic_adapter *adapter)
 	struct pci_dev *pdev = adapter->pdev;
 	int rc;
 
-	adapter->fw_type = QLCNIC_UNKNOWN_ROMIMAGE;
+	adapter->ahw->fw_type = QLCNIC_UNKNOWN_ROMIMAGE;
 
 next:
 	qlcnic_get_next_fwtype(adapter);
 
-	if (adapter->fw_type == QLCNIC_FLASH_ROMIMAGE) {
+	if (adapter->ahw->fw_type == QLCNIC_FLASH_ROMIMAGE) {
 		adapter->fw = NULL;
 	} else {
 		rc = request_firmware(&adapter->fw,
-				fw_name[adapter->fw_type], &pdev->dev);
+				      fw_name[adapter->ahw->fw_type],
+				      &pdev->dev);
 		if (rc != 0)
 			goto next;
 
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index ba352c1..880a9ca 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -709,7 +709,7 @@ static void qlcnic_handle_linkevent(struct qlcnic_adapter *adapter,
 	u8  link_status, module, duplex, autoneg, lb_status = 0;
 	struct net_device *netdev = adapter->netdev;
 
-	adapter->has_link_events = 1;
+	adapter->ahw->has_link_events = 1;
 
 	cable_OUI = msg->body[1] & 0xffffffff;
 	cable_len = (msg->body[1] >> 32) & 0xffff;
@@ -736,18 +736,18 @@ static void qlcnic_handle_linkevent(struct qlcnic_adapter *adapter,
 	qlcnic_advert_link_change(adapter, link_status);
 
 	if (duplex == LINKEVENT_FULL_DUPLEX)
-		adapter->link_duplex = DUPLEX_FULL;
+		adapter->ahw->link_duplex = DUPLEX_FULL;
 	else
-		adapter->link_duplex = DUPLEX_HALF;
+		adapter->ahw->link_duplex = DUPLEX_HALF;
 
-	adapter->module_type = module;
-	adapter->link_autoneg = autoneg;
+	adapter->ahw->module_type = module;
+	adapter->ahw->link_autoneg = autoneg;
 
 	if (link_status) {
-		adapter->link_speed = link_speed;
+		adapter->ahw->link_speed = link_speed;
 	} else {
-		adapter->link_speed = SPEED_UNKNOWN;
-		adapter->link_duplex = DUPLEX_UNKNOWN;
+		adapter->ahw->link_speed = SPEED_UNKNOWN;
+		adapter->ahw->link_duplex = DUPLEX_UNKNOWN;
 	}
 }
 
@@ -785,17 +785,17 @@ static void qlcnic_handle_fw_message(int desc_cnt, int index,
 			break;
 		case 1:
 			dev_info(dev, "loopback already in progress\n");
-			adapter->diag_cnt = -QLCNIC_TEST_IN_PROGRESS;
+			adapter->ahw->diag_cnt = -QLCNIC_TEST_IN_PROGRESS;
 			break;
 		case 2:
 			dev_info(dev, "loopback cable is not connected\n");
-			adapter->diag_cnt = -QLCNIC_LB_CABLE_NOT_CONN;
+			adapter->ahw->diag_cnt = -QLCNIC_LB_CABLE_NOT_CONN;
 			break;
 		default:
 			dev_info(dev,
 				 "loopback configure request failed, err %x\n",
 				 ret);
-			adapter->diag_cnt = -QLCNIC_UNDEFINED_ERROR;
+			adapter->ahw->diag_cnt = -QLCNIC_UNDEFINED_ERROR;
 			break;
 		}
 		break;
@@ -1169,7 +1169,7 @@ static void qlcnic_process_rcv_diag(struct qlcnic_adapter *adapter, int ring,
 		skb_pull(skb, pkt_offset);
 
 	if (!qlcnic_check_loopback_buff(skb->data, adapter->mac_addr))
-		adapter->diag_cnt++;
+		adapter->ahw->diag_cnt++;
 	else
 		dump_skb(skb, adapter);
 
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 5fad56e..7679497 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -261,7 +261,7 @@ static int qlcnic_enable_msix(struct qlcnic_adapter *adapter, u32 num_msix)
 	adapter->flags &= ~(QLCNIC_MSI_ENABLED | QLCNIC_MSIX_ENABLED);
 	qlcnic_set_msix_bit(pdev, 0);
 
-	if (adapter->msix_supported) {
+	if (adapter->ahw->msix_supported) {
  enable_msix:
 		qlcnic_init_msix_entries(adapter, num_msix);
 		err = pci_enable_msix(pdev, adapter->msix_entries, num_msix);
@@ -300,7 +300,7 @@ static void qlcnic_enable_msi_legacy(struct qlcnic_adapter *adapter)
 
 	legacy_intrp = &legacy_intr[adapter->ahw->pci_func];
 
-	adapter->int_vec_bit = legacy_intrp->int_vec_bit;
+	adapter->ahw->int_vec_bit = legacy_intrp->int_vec_bit;
 	adapter->tgt_status_reg = qlcnic_get_ioaddr(adapter,
 			legacy_intrp->tgt_status_reg);
 	adapter->tgt_mask_reg = qlcnic_get_ioaddr(adapter,
@@ -318,7 +318,7 @@ qlcnic_setup_intr(struct qlcnic_adapter *adapter)
 {
 	int num_msix;
 
-	if (adapter->msix_supported) {
+	if (adapter->ahw->msix_supported) {
 		num_msix = rounddown_pow_of_two(min_t(int, num_online_cpus(),
 				QLCNIC_DEF_NUM_STS_DESC_RINGS));
 	} else
@@ -452,8 +452,8 @@ qlcnic_check_vf(struct qlcnic_adapter *adapter)
 	u32 op_mode, priv_level;
 
 	/* Determine FW API version */
-	adapter->fw_hal_version = readl(adapter->ahw->pci_base0 +
-					QLCNIC_FW_API);
+	adapter->ahw->fw_hal_version = readl(adapter->ahw->pci_base0 +
+					     QLCNIC_FW_API);
 
 	/* Find PCI function number */
 	pci_read_config_dword(adapter->pdev, QLCNIC_MSIX_TABLE_OFFSET, &func);
@@ -471,10 +471,10 @@ qlcnic_check_vf(struct qlcnic_adapter *adapter)
 		priv_level = QLC_DEV_GET_DRV(op_mode, adapter->ahw->pci_func);
 
 	if (priv_level == QLCNIC_NON_PRIV_FUNC) {
-		adapter->op_mode = QLCNIC_NON_PRIV_FUNC;
+		adapter->ahw->op_mode = QLCNIC_NON_PRIV_FUNC;
 		dev_info(&adapter->pdev->dev,
 			"HAL Version: %d Non Privileged function\n",
-			adapter->fw_hal_version);
+			 adapter->ahw->fw_hal_version);
 		adapter->nic_ops = &qlcnic_vf_ops;
 	} else
 		adapter->nic_ops = &qlcnic_ops;
@@ -557,7 +557,7 @@ qlcnic_check_options(struct qlcnic_adapter *adapter)
 
 	adapter->fw_version = QLCNIC_VERSION_CODE(fw_major, fw_minor, fw_build);
 
-	if (adapter->op_mode != QLCNIC_NON_PRIV_FUNC) {
+	if (adapter->ahw->op_mode != QLCNIC_NON_PRIV_FUNC) {
 		if (fw_dump->tmpl_hdr == NULL ||
 				adapter->fw_version > prev_fw_version) {
 			if (fw_dump->tmpl_hdr)
@@ -589,7 +589,7 @@ qlcnic_check_options(struct qlcnic_adapter *adapter)
 		adapter->max_rxd = MAX_RCV_DESCRIPTORS_1G;
 	}
 
-	adapter->msix_supported = !!use_msi_x;
+	adapter->ahw->msix_supported = !!use_msi_x;
 
 	adapter->num_txd = MAX_CMD_DESCRIPTORS;
 
@@ -606,15 +606,15 @@ qlcnic_initialize_nic(struct qlcnic_adapter *adapter)
 	if (err)
 		return err;
 
-	adapter->physical_port = (u8)nic_info.phys_port;
-	adapter->switch_mode = nic_info.switch_mode;
-	adapter->max_tx_ques = nic_info.max_tx_ques;
-	adapter->max_rx_ques = nic_info.max_rx_ques;
-	adapter->capabilities = nic_info.capabilities;
-	adapter->max_mac_filters = nic_info.max_mac_filters;
-	adapter->max_mtu = nic_info.max_mtu;
+	adapter->ahw->physical_port = (u8)nic_info.phys_port;
+	adapter->ahw->switch_mode = nic_info.switch_mode;
+	adapter->ahw->max_tx_ques = nic_info.max_tx_ques;
+	adapter->ahw->max_rx_ques = nic_info.max_rx_ques;
+	adapter->ahw->capabilities = nic_info.capabilities;
+	adapter->ahw->max_mac_filters = nic_info.max_mac_filters;
+	adapter->ahw->max_mtu = nic_info.max_mtu;
 
-	if (adapter->capabilities & BIT_6)
+	if (adapter->ahw->capabilities & BIT_6)
 		adapter->flags |= QLCNIC_ESWITCH_ENABLED;
 	else
 		adapter->flags &= ~QLCNIC_ESWITCH_ENABLED;
@@ -700,7 +700,7 @@ qlcnic_set_netdev_features(struct qlcnic_adapter *adapter,
 	vlan_features = (NETIF_F_SG | NETIF_F_IP_CSUM |
 			NETIF_F_IPV6_CSUM | NETIF_F_HW_VLAN_FILTER);
 
-	if (adapter->capabilities & QLCNIC_FW_CAPABILITY_TSO) {
+	if (adapter->ahw->capabilities & QLCNIC_FW_CAPABILITY_TSO) {
 		features |= (NETIF_F_TSO | NETIF_F_TSO6);
 		vlan_features |= (NETIF_F_TSO | NETIF_F_TSO6);
 	}
@@ -746,7 +746,7 @@ qlcnic_check_eswitch_mode(struct qlcnic_adapter *adapter)
 
 	if (adapter->flags & QLCNIC_ESWITCH_ENABLED) {
 		if (priv_level == QLCNIC_MGMT_FUNC) {
-			adapter->op_mode = QLCNIC_MGMT_FUNC;
+			adapter->ahw->op_mode = QLCNIC_MGMT_FUNC;
 			err = qlcnic_init_pci_info(adapter);
 			if (err)
 				return err;
@@ -754,12 +754,12 @@ qlcnic_check_eswitch_mode(struct qlcnic_adapter *adapter)
 			qlcnic_set_function_modes(adapter);
 			dev_info(&adapter->pdev->dev,
 				"HAL Version: %d, Management function\n",
-				adapter->fw_hal_version);
+				 adapter->ahw->fw_hal_version);
 		} else if (priv_level == QLCNIC_PRIV_FUNC) {
-			adapter->op_mode = QLCNIC_PRIV_FUNC;
+			adapter->ahw->op_mode = QLCNIC_PRIV_FUNC;
 			dev_info(&adapter->pdev->dev,
 				"HAL Version: %d, Privileged function\n",
-				adapter->fw_hal_version);
+				 adapter->ahw->fw_hal_version);
 		}
 	}
 
@@ -786,7 +786,7 @@ qlcnic_set_default_offload_settings(struct qlcnic_adapter *adapter)
 		esw_cfg.offload_flags = BIT_0;
 		esw_cfg.mac_override = BIT_0;
 		esw_cfg.promisc_mode = BIT_0;
-		if (adapter->capabilities  & QLCNIC_FW_CAPABILITY_TSO)
+		if (adapter->ahw->capabilities  & QLCNIC_FW_CAPABILITY_TSO)
 			esw_cfg.offload_flags |= (BIT_1 | BIT_2);
 		if (qlcnic_config_switch_port(adapter, &esw_cfg))
 			return -EIO;
@@ -867,7 +867,7 @@ static int qlcnic_check_npar_opertional(struct qlcnic_adapter *adapter)
 	u8 npar_opt_timeo = QLCNIC_DEV_NPAR_OPER_TIMEO;
 	u32 npar_state;
 
-	if (adapter->op_mode == QLCNIC_MGMT_FUNC)
+	if (adapter->ahw->op_mode == QLCNIC_MGMT_FUNC)
 		return 0;
 
 	npar_state = QLCRD32(adapter, QLCNIC_CRB_DEV_NPAR_STATE);
@@ -889,7 +889,7 @@ qlcnic_set_mgmt_operations(struct qlcnic_adapter *adapter)
 	int err;
 
 	if (!(adapter->flags & QLCNIC_ESWITCH_ENABLED) ||
-		    adapter->op_mode != QLCNIC_MGMT_FUNC)
+	    adapter->ahw->op_mode != QLCNIC_MGMT_FUNC)
 		return 0;
 
 	err = qlcnic_set_default_offload_settings(adapter);
@@ -923,7 +923,7 @@ qlcnic_start_firmware(struct qlcnic_adapter *adapter)
 		if (err)
 			goto err_out;
 
-		adapter->fw_type = QLCNIC_FLASH_ROMIMAGE;
+		adapter->ahw->fw_type = QLCNIC_FLASH_ROMIMAGE;
 	}
 
 	err = qlcnic_need_fw_reset(adapter);
@@ -984,7 +984,7 @@ qlcnic_request_irq(struct qlcnic_adapter *adapter)
 	struct net_device *netdev = adapter->netdev;
 	struct qlcnic_recv_context *recv_ctx = adapter->recv_ctx;
 
-	if (adapter->diag_test == QLCNIC_INTERRUPT_TEST) {
+	if (adapter->ahw->diag_test == QLCNIC_INTERRUPT_TEST) {
 		handler = qlcnic_tmp_intr;
 		if (!QLCNIC_IS_MSI_FAMILY(adapter))
 			flags |= IRQF_SHARED;
@@ -1043,7 +1043,7 @@ __qlcnic_up(struct qlcnic_adapter *adapter, struct net_device *netdev)
 	if (qlcnic_set_eswitch_port_config(adapter))
 		return -EIO;
 
-	if (adapter->capabilities & QLCNIC_FW_CAPABILITY_MORE_CAPS) {
+	if (adapter->ahw->capabilities & QLCNIC_FW_CAPABILITY_MORE_CAPS) {
 		capab2 = QLCRD32(adapter, CRB_FW_CAPABILITIES_2);
 		if (capab2 & QLCNIC_FW_CAPABILITY_2_LRO_MAX_TCP_SEG)
 			adapter->flags |= QLCNIC_FW_LRO_MSS_CAP;
@@ -1074,7 +1074,7 @@ __qlcnic_up(struct qlcnic_adapter *adapter, struct net_device *netdev)
 
 	qlcnic_linkevent_request(adapter, 1);
 
-	adapter->reset_context = 0;
+	adapter->ahw->reset_context = 0;
 	set_bit(__QLCNIC_DEV_UP, &adapter->state);
 	return 0;
 }
@@ -1207,7 +1207,7 @@ void qlcnic_diag_free_res(struct net_device *netdev, int max_sds_rings)
 	int ring;
 
 	clear_bit(__QLCNIC_DEV_UP, &adapter->state);
-	if (adapter->diag_test == QLCNIC_INTERRUPT_TEST) {
+	if (adapter->ahw->diag_test == QLCNIC_INTERRUPT_TEST) {
 		for (ring = 0; ring < adapter->max_sds_rings; ring++) {
 			sds_ring = &adapter->recv_ctx->sds_rings[ring];
 			qlcnic_disable_int(sds_ring);
@@ -1218,7 +1218,7 @@ void qlcnic_diag_free_res(struct net_device *netdev, int max_sds_rings)
 
 	qlcnic_detach(adapter);
 
-	adapter->diag_test = 0;
+	adapter->ahw->diag_test = 0;
 	adapter->max_sds_rings = max_sds_rings;
 
 	if (qlcnic_attach(adapter))
@@ -1288,7 +1288,7 @@ int qlcnic_diag_alloc_res(struct net_device *netdev, int test)
 	qlcnic_detach(adapter);
 
 	adapter->max_sds_rings = 1;
-	adapter->diag_test = test;
+	adapter->ahw->diag_test = test;
 
 	ret = qlcnic_attach(adapter);
 	if (ret) {
@@ -1308,14 +1308,14 @@ int qlcnic_diag_alloc_res(struct net_device *netdev, int test)
 		qlcnic_post_rx_buffers(adapter, rds_ring);
 	}
 
-	if (adapter->diag_test == QLCNIC_INTERRUPT_TEST) {
+	if (adapter->ahw->diag_test == QLCNIC_INTERRUPT_TEST) {
 		for (ring = 0; ring < adapter->max_sds_rings; ring++) {
 			sds_ring = &adapter->recv_ctx->sds_rings[ring];
 			qlcnic_enable_int(sds_ring);
 		}
 	}
 
-	if (adapter->diag_test == QLCNIC_LOOPBACK_TEST) {
+	if (adapter->ahw->diag_test == QLCNIC_LOOPBACK_TEST) {
 		adapter->ahw->loopback_state = 0;
 		qlcnic_linkevent_request(adapter, 1);
 	}
@@ -1386,8 +1386,8 @@ qlcnic_setup_netdev(struct qlcnic_adapter *adapter, struct net_device *netdev,
 	int err;
 	struct pci_dev *pdev = adapter->pdev;
 
-	adapter->mc_enabled = 0;
-	adapter->max_mc_count = 38;
+	adapter->ahw->mc_enabled = 0;
+	adapter->ahw->max_mc_count = 38;
 
 	netdev->netdev_ops	   = &qlcnic_netdev_ops;
 	netdev->watchdog_timeo     = 5*HZ;
@@ -1399,16 +1399,16 @@ qlcnic_setup_netdev(struct qlcnic_adapter *adapter, struct net_device *netdev,
 	netdev->hw_features = NETIF_F_SG | NETIF_F_IP_CSUM |
 		NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM;
 
-	if (adapter->capabilities & QLCNIC_FW_CAPABILITY_TSO)
+	if (adapter->ahw->capabilities & QLCNIC_FW_CAPABILITY_TSO)
 		netdev->hw_features |= NETIF_F_TSO | NETIF_F_TSO6;
 	if (pci_using_dac == 1)
 		netdev->hw_features |= NETIF_F_HIGHDMA;
 
 	netdev->vlan_features = netdev->hw_features;
 
-	if (adapter->capabilities & QLCNIC_FW_CAPABILITY_FVLANTX)
+	if (adapter->ahw->capabilities & QLCNIC_FW_CAPABILITY_FVLANTX)
 		netdev->hw_features |= NETIF_F_HW_VLAN_TX;
-	if (adapter->capabilities & QLCNIC_FW_CAPABILITY_HW_LRO)
+	if (adapter->ahw->capabilities & QLCNIC_FW_CAPABILITY_HW_LRO)
 		netdev->hw_features |= NETIF_F_LRO;
 
 	netdev->features |= netdev->hw_features |
@@ -1549,7 +1549,7 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	qlcnic_clear_stats(adapter);
 
-	err = qlcnic_alloc_msix_entries(adapter, adapter->max_rx_ques);
+	err = qlcnic_alloc_msix_entries(adapter, adapter->ahw->max_rx_ques);
 	if (err)
 		goto err_out_decr_ref;
 
@@ -1846,7 +1846,7 @@ static int qlcnic_check_temp(struct qlcnic_adapter *adapter)
 		       temp_val);
 		rv = 1;
 	} else if (temp_state == QLCNIC_TEMP_WARN) {
-		if (adapter->temp == QLCNIC_TEMP_NORMAL) {
+		if (adapter->ahw->temp == QLCNIC_TEMP_NORMAL) {
 			dev_err(&netdev->dev,
 			       "Device temperature %d degrees C "
 			       "exceeds operating range."
@@ -1854,13 +1854,13 @@ static int qlcnic_check_temp(struct qlcnic_adapter *adapter)
 			       temp_val);
 		}
 	} else {
-		if (adapter->temp == QLCNIC_TEMP_WARN) {
+		if (adapter->ahw->temp == QLCNIC_TEMP_WARN) {
 			dev_info(&netdev->dev,
 			       "Device temperature is now %d degrees C"
 			       " in normal range.\n", temp_val);
 		}
 	}
-	adapter->temp = temp_state;
+	adapter->ahw->temp = temp_state;
 	return rv;
 }
 
@@ -1876,7 +1876,7 @@ static void qlcnic_tx_timeout(struct net_device *netdev)
 	if (++adapter->tx_timeo_cnt >= QLCNIC_MAX_TX_TIMEOUTS)
 		adapter->need_fw_reset = 1;
 	else
-		adapter->reset_context = 1;
+		adapter->ahw->reset_context = 1;
 }
 
 static struct net_device_stats *qlcnic_get_stats(struct net_device *netdev)
@@ -1900,7 +1900,7 @@ static irqreturn_t qlcnic_clear_legacy_intr(struct qlcnic_adapter *adapter)
 
 	status = readl(adapter->isr_int_vec);
 
-	if (!(status & adapter->int_vec_bit))
+	if (!(status & adapter->ahw->int_vec_bit))
 		return IRQ_NONE;
 
 	/* check interrupt state machine, to be sure */
@@ -1932,7 +1932,7 @@ static irqreturn_t qlcnic_tmp_intr(int irq, void *data)
 		return IRQ_NONE;
 
 done:
-	adapter->diag_cnt++;
+	adapter->ahw->diag_cnt++;
 	qlcnic_enable_int(sds_ring);
 	return IRQ_HANDLED;
 }
@@ -2213,7 +2213,7 @@ qlcnic_fwinit_work(struct work_struct *work)
 		return;
 	}
 
-	if (adapter->op_mode == QLCNIC_NON_PRIV_FUNC) {
+	if (adapter->ahw->op_mode == QLCNIC_NON_PRIV_FUNC) {
 		qlcnic_api_unlock(adapter);
 		goto wait_npar;
 	}
@@ -2329,9 +2329,9 @@ qlcnic_detach_work(struct work_struct *work)
 		goto err_ret;
 	}
 
-	if (adapter->temp == QLCNIC_TEMP_PANIC) {
+	if (adapter->ahw->temp == QLCNIC_TEMP_PANIC) {
 		dev_err(&adapter->pdev->dev, "Detaching the device: temp=%d\n",
-			adapter->temp);
+			adapter->ahw->temp);
 		goto err_ret;
 	}
 
@@ -2456,7 +2456,7 @@ qlcnic_attach_work(struct work_struct *work)
 	struct net_device *netdev = adapter->netdev;
 	u32 npar_state;
 
-	if (adapter->op_mode != QLCNIC_MGMT_FUNC) {
+	if (adapter->ahw->op_mode != QLCNIC_MGMT_FUNC) {
 		npar_state = QLCRD32(adapter, QLCNIC_CRB_DEV_NPAR_STATE);
 		if (adapter->fw_wait_cnt++ > QLCNIC_DEV_NPAR_OPER_TIMEO)
 			qlcnic_clr_all_drv_state(adapter, 0);
@@ -2513,7 +2513,7 @@ qlcnic_check_health(struct qlcnic_adapter *adapter)
 		if (adapter->need_fw_reset)
 			goto detach;
 
-		if (adapter->reset_context && auto_fw_reset) {
+		if (adapter->ahw->reset_context && auto_fw_reset) {
 			qlcnic_reset_hw_context(adapter);
 			adapter->netdev->trans_start = jiffies;
 		}
@@ -2625,7 +2625,7 @@ static int qlcnic_attach_func(struct pci_dev *pdev)
 	if (qlcnic_api_lock(adapter))
 		return -EINVAL;
 
-	if (adapter->op_mode != QLCNIC_NON_PRIV_FUNC && first_func) {
+	if (adapter->ahw->op_mode != QLCNIC_NON_PRIV_FUNC && first_func) {
 		adapter->need_fw_reset = 1;
 		set_bit(__QLCNIC_START_FW, &adapter->state);
 		QLCWR32(adapter, QLCNIC_CRB_DEV_STATE, QLCNIC_DEV_INITIALIZING);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
index 10a702a..252c9cb 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sysfs.c
@@ -33,7 +33,7 @@ static ssize_t qlcnic_store_bridged_mode(struct device *dev,
 	unsigned long new;
 	int ret = -EINVAL;
 
-	if (!(adapter->capabilities & QLCNIC_FW_CAPABILITY_BDG))
+	if (!(adapter->ahw->capabilities & QLCNIC_FW_CAPABILITY_BDG))
 		goto err_out;
 
 	if (!test_bit(__QLCNIC_DEV_UP, &adapter->state))
@@ -56,7 +56,7 @@ static ssize_t qlcnic_show_bridged_mode(struct device *dev,
 	struct qlcnic_adapter *adapter = dev_get_drvdata(dev);
 	int bridged_mode = 0;
 
-	if (adapter->capabilities & QLCNIC_FW_CAPABILITY_BDG)
+	if (adapter->ahw->capabilities & QLCNIC_FW_CAPABILITY_BDG)
 		bridged_mode = !!(adapter->flags & QLCNIC_BRIDGE_ENABLED);
 
 	return sprintf(buf, "%d\n", bridged_mode);
@@ -118,7 +118,7 @@ static ssize_t qlcnic_store_beacon(struct device *dev,
 	u8 b_state, b_rate;
 	int err;
 
-	if (adapter->op_mode == QLCNIC_NON_PRIV_FUNC) {
+	if (adapter->ahw->op_mode == QLCNIC_NON_PRIV_FUNC) {
 		dev_warn(dev,
 			 "LED test not supported in non privileged mode\n");
 		return -EOPNOTSUPP;
@@ -415,7 +415,7 @@ static int validate_esw_config(struct qlcnic_adapter *adapter,
 		if (pci_func >= QLCNIC_MAX_PCI_FUNC)
 			return QL_STATUS_INVALID_PARAM;
 
-		if (adapter->op_mode == QLCNIC_MGMT_FUNC) {
+		if (adapter->ahw->op_mode == QLCNIC_MGMT_FUNC) {
 			if (adapter->npars[pci_func].type != QLCNIC_TYPE_NIC)
 				return QL_STATUS_INVALID_PARAM;
 		}
@@ -473,7 +473,7 @@ static ssize_t qlcnic_sysfs_write_esw_config(struct file *file,
 		return ret;
 
 	for (i = 0; i < count; i++) {
-		if (adapter->op_mode == QLCNIC_MGMT_FUNC) {
+		if (adapter->ahw->op_mode == QLCNIC_MGMT_FUNC) {
 			if (qlcnic_config_switch_port(adapter, &esw_cfg[i]))
 				return QL_STATUS_INVALID_PARAM;
 		}
@@ -500,7 +500,7 @@ static ssize_t qlcnic_sysfs_write_esw_config(struct file *file,
 		}
 	}
 
-	if (adapter->op_mode != QLCNIC_MGMT_FUNC)
+	if (adapter->ahw->op_mode != QLCNIC_MGMT_FUNC)
 		goto out;
 
 	for (i = 0; i < count; i++) {
@@ -882,7 +882,7 @@ void qlcnic_create_sysfs_entries(struct qlcnic_adapter *adapter)
 {
 	struct device *dev = &adapter->pdev->dev;
 
-	if (adapter->capabilities & QLCNIC_FW_CAPABILITY_BDG)
+	if (adapter->ahw->capabilities & QLCNIC_FW_CAPABILITY_BDG)
 		if (device_create_file(dev, &dev_attr_bridged_mode))
 			dev_warn(dev,
 				 "failed to create bridged_mode sysfs entry\n");
@@ -892,7 +892,7 @@ void qlcnic_remove_sysfs_entries(struct qlcnic_adapter *adapter)
 {
 	struct device *dev = &adapter->pdev->dev;
 
-	if (adapter->capabilities & QLCNIC_FW_CAPABILITY_BDG)
+	if (adapter->ahw->capabilities & QLCNIC_FW_CAPABILITY_BDG)
 		device_remove_file(dev, &dev_attr_bridged_mode);
 }
 
@@ -904,7 +904,7 @@ void qlcnic_create_diag_entries(struct qlcnic_adapter *adapter)
 	if (device_create_bin_file(dev, &bin_attr_port_stats))
 		dev_info(dev, "failed to create port stats sysfs entry");
 
-	if (adapter->op_mode == QLCNIC_NON_PRIV_FUNC)
+	if (adapter->ahw->op_mode == QLCNIC_NON_PRIV_FUNC)
 		return;
 	if (device_create_file(dev, &dev_attr_diag_mode))
 		dev_info(dev, "failed to create diag_mode sysfs entry\n");
@@ -925,7 +925,7 @@ void qlcnic_create_diag_entries(struct qlcnic_adapter *adapter)
 		return;
 	if (device_create_bin_file(dev, &bin_attr_esw_config))
 		dev_info(dev, "failed to create esw config sysfs entry");
-	if (adapter->op_mode != QLCNIC_MGMT_FUNC)
+	if (adapter->ahw->op_mode != QLCNIC_MGMT_FUNC)
 		return;
 	if (device_create_bin_file(dev, &bin_attr_npar_config))
 		dev_info(dev, "failed to create npar config sysfs entry");
@@ -942,7 +942,7 @@ void qlcnic_remove_diag_entries(struct qlcnic_adapter *adapter)
 
 	device_remove_bin_file(dev, &bin_attr_port_stats);
 
-	if (adapter->op_mode == QLCNIC_NON_PRIV_FUNC)
+	if (adapter->ahw->op_mode == QLCNIC_NON_PRIV_FUNC)
 		return;
 	device_remove_file(dev, &dev_attr_diag_mode);
 	device_remove_bin_file(dev, &bin_attr_crb);
@@ -954,7 +954,7 @@ void qlcnic_remove_diag_entries(struct qlcnic_adapter *adapter)
 	if (!(adapter->flags & QLCNIC_ESWITCH_ENABLED))
 		return;
 	device_remove_bin_file(dev, &bin_attr_esw_config);
-	if (adapter->op_mode != QLCNIC_MGMT_FUNC)
+	if (adapter->ahw->op_mode != QLCNIC_MGMT_FUNC)
 		return;
 	device_remove_bin_file(dev, &bin_attr_npar_config);
 	device_remove_bin_file(dev, &bin_attr_pm_config);
-- 
1.8.0

^ permalink raw reply related

* [PATCH 1/7] qlcnic: add 82xx adapter specific checks
From: Sony Chacko @ 2012-12-04 13:33 UTC (permalink / raw)
  To: davem; +Cc: netdev, Dept_NX_Linux_NIC_Driver, Sony Chacko
In-Reply-To: <1354628038-2234-1-git-send-email-sony.chacko@qlogic.com>

From: Sony Chacko <sony.chacko@qlogic.com>

Add 82xx adapter ID check before 82xx specific operations as part of
refactoring the driver to enable support for new adapter.

Signed-off-by: Sony Chacko <sony.chacko@qlogic.com>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic.h      |    7 ++++++
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c |   23 ++++++++++++++-------
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index 082eecb..ebc5b06 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -1515,4 +1515,11 @@ struct qlcnic_nic_template {
 			__func__, ##_args);		\
 	} while (0)
 
+#define PCI_DEVICE_ID_QLOGIC_QLE824X	0x8020
+static inline bool qlcnic_82xx_check(struct qlcnic_adapter *adapter)
+{
+	unsigned short device = adapter->pdev->device;
+	return (device == PCI_DEVICE_ID_QLOGIC_QLE824X) ? true : false;
+}
+
 #endif				/* __QLCNIC_H_ */
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 1eef0bf..5fad56e 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -1561,7 +1561,9 @@ qlcnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	pci_set_drvdata(pdev, adapter);
 
-	qlcnic_schedule_work(adapter, qlcnic_fw_poll_work, FW_POLL_DELAY);
+	if (qlcnic_82xx_check(adapter))
+		qlcnic_schedule_work(adapter, qlcnic_fw_poll_work,
+				     FW_POLL_DELAY);
 
 	switch (adapter->ahw->port_type) {
 	case QLCNIC_GBE:
@@ -1640,7 +1642,8 @@ static void __devexit qlcnic_remove(struct pci_dev *pdev)
 	if (adapter->eswitch != NULL)
 		kfree(adapter->eswitch);
 
-	qlcnic_clr_all_drv_state(adapter, 0);
+	if (qlcnic_82xx_check(adapter))
+		qlcnic_clr_all_drv_state(adapter, 0);
 
 	clear_bit(__QLCNIC_RESETTING, &adapter->state);
 
@@ -1676,7 +1679,8 @@ static int __qlcnic_shutdown(struct pci_dev *pdev)
 	if (netif_running(netdev))
 		qlcnic_down(adapter, netdev);
 
-	qlcnic_clr_all_drv_state(adapter, 0);
+	if (qlcnic_82xx_check(adapter))
+		qlcnic_clr_all_drv_state(adapter, 0);
 
 	clear_bit(__QLCNIC_RESETTING, &adapter->state);
 
@@ -1684,9 +1688,11 @@ static int __qlcnic_shutdown(struct pci_dev *pdev)
 	if (retval)
 		return retval;
 
-	if (qlcnic_wol_supported(adapter)) {
-		pci_enable_wake(pdev, PCI_D3cold, 1);
-		pci_enable_wake(pdev, PCI_D3hot, 1);
+	if (qlcnic_82xx_check(adapter)) {
+		if (qlcnic_wol_supported(adapter)) {
+			pci_enable_wake(pdev, PCI_D3cold, 1);
+			pci_enable_wake(pdev, PCI_D3hot, 1);
+		}
 	}
 
 	return 0;
@@ -1824,10 +1830,11 @@ static void qlcnic_free_lb_filters_mem(struct qlcnic_adapter *adapter)
 static int qlcnic_check_temp(struct qlcnic_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
-	u32 temp, temp_state, temp_val;
+	u32 temp_state, temp_val, temp = 0;
 	int rv = 0;
 
-	temp = QLCRD32(adapter, CRB_TEMP_STATE);
+	if (qlcnic_82xx_check(adapter))
+		temp = QLCRD32(adapter, CRB_TEMP_STATE);
 
 	temp_state = qlcnic_get_temp_state(temp);
 	temp_val = qlcnic_get_temp_val(temp);
-- 
1.7.1

^ permalink raw reply related

* [PATCH 0/7] qlcnic: refactor 82xx adapter driver
From: Sony Chacko @ 2012-12-04 13:33 UTC (permalink / raw)
  To: davem; +Cc: netdev, Dept_NX_Linux_NIC_Driver, Sony Chacko

From: Sony Chacko <sony.chacko@qlogic.com>

Please apply the refactoring patches to net-next.

Thanks,
Sony

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox