Netdev List
 help / color / mirror / Atom feed
* [PATCH 9/9 Rev3] [IPoIB] Implement the new batching API
From: Krishna Kumar @ 2007-08-08  9:32 UTC (permalink / raw)
  To: johnpol, kaber, shemminger, davem, sri
  Cc: jagana, Robert.Olsson, peter.p.waskiewicz.jr, herbert, gaagaan,
	kumarkr, rdreier, rick.jones2, mcarlson, jeff, general, mchan,
	tgraf, hadi, netdev, Krishna Kumar, xma
In-Reply-To: <20070808093114.15396.22797.sendpatchset@localhost.localdomain>

IPoIB: implement the new batching API.

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
---
 ipoib_main.c |  189 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 184 insertions(+), 5 deletions(-)

diff -ruNp ORG/drivers/infiniband/ulp/ipoib/ipoib_main.c NEW/drivers/infiniband/ulp/ipoib/ipoib_main.c
--- ORG/drivers/infiniband/ulp/ipoib/ipoib_main.c	2007-07-12 08:55:06.000000000 +0530
+++ NEW/drivers/infiniband/ulp/ipoib/ipoib_main.c	2007-08-07 13:11:19.000000000 +0530
@@ -558,7 +558,8 @@ static void neigh_add_path(struct sk_buf
 				goto err_drop;
 			}
 		} else
-			ipoib_send(dev, skb, path->ah, IPOIB_QPN(skb->dst->neighbour->ha));
+			ipoib_send(dev, skb, path->ah,
+				   IPOIB_QPN(skb->dst->neighbour->ha), 1);
 	} else {
 		neigh->ah  = NULL;
 
@@ -638,7 +639,7 @@ static void unicast_arp_send(struct sk_b
 		ipoib_dbg(priv, "Send unicast ARP to %04x\n",
 			  be16_to_cpu(path->pathrec.dlid));
 
-		ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr));
+		ipoib_send(dev, skb, path->ah, IPOIB_QPN(phdr->hwaddr), 1);
 	} else if ((path->query || !path_rec_start(dev, path)) &&
 		   skb_queue_len(&path->queue) < IPOIB_MAX_PATH_REC_QUEUE) {
 		/* put pseudoheader back on for next time */
@@ -704,7 +705,8 @@ static int ipoib_start_xmit(struct sk_bu
 				goto out;
 			}
 
-			ipoib_send(dev, skb, neigh->ah, IPOIB_QPN(skb->dst->neighbour->ha));
+			ipoib_send(dev, skb, neigh->ah,
+				   IPOIB_QPN(skb->dst->neighbour->ha), 1);
 			goto out;
 		}
 
@@ -753,6 +755,153 @@ out:
 	return NETDEV_TX_OK;
 }
 
+#define	XMIT_QUEUED_SKBS()						\
+	do {								\
+		if (wr_num) {						\
+			ipoib_send(dev, NULL, old_neigh->ah, old_qpn,	\
+				   wr_num);				\
+			wr_num = 0;					\
+		}							\
+	} while (0)
+
+/*
+ * TODO: Merge with ipoib_start_xmit to use the same code and have a
+ * transparent wrapper caller to xmit's, etc. Status: Done, needs testing.
+ */
+static int ipoib_start_xmit_frames(struct net_device *dev)
+{
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+	struct sk_buff *skb;
+	struct sk_buff_head *blist = dev->skb_blist;
+	int max_skbs, wr_num = 0;
+	u32 qpn, old_qpn = 0;
+	struct ipoib_neigh *neigh, *old_neigh = NULL;
+	unsigned long flags;
+
+	if (unlikely(!spin_trylock_irqsave(&priv->tx_lock, flags)))
+		return NETDEV_TX_LOCKED;
+
+	/*
+	 * Figure out how many skbs can be sent. This prevents the device
+	 * getting full and avoids checking for queue stopped after each
+	 * iteration.
+	 */
+	max_skbs = ipoib_sendq_size - (priv->tx_head - priv->tx_tail);
+	while (max_skbs-- > 0 && (skb = __skb_dequeue(blist)) != NULL) {
+		if (likely(skb->dst && skb->dst->neighbour)) {
+			if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) {
+				XMIT_QUEUED_SKBS();
+				ipoib_path_lookup(skb, dev);
+				continue;
+			}
+
+			neigh = *to_ipoib_neigh(skb->dst->neighbour);
+
+			if (ipoib_cm_get(neigh)) {
+				if (ipoib_cm_up(neigh)) {
+					XMIT_QUEUED_SKBS();
+					ipoib_cm_send(dev, skb,
+						      ipoib_cm_get(neigh));
+					continue;
+				}
+			} else if (neigh->ah) {
+				if (unlikely(memcmp(&neigh->dgid.raw,
+						    skb->dst->neighbour->ha + 4,
+						    sizeof(union ib_gid)))) {
+					spin_lock(&priv->lock);
+					/*
+					 * It's safe to call ipoib_put_ah()
+					 * inside priv->lock here, because we
+					 * know that path->ah will always hold
+					 * one more reference, so ipoib_put_ah()
+					 * will never do more than decrement
+					 * the ref count.
+					 */
+					ipoib_put_ah(neigh->ah);
+					list_del(&neigh->list);
+					ipoib_neigh_free(dev, neigh);
+					spin_unlock(&priv->lock);
+					XMIT_QUEUED_SKBS();
+					ipoib_path_lookup(skb, dev);
+					continue;
+				}
+
+				qpn = IPOIB_QPN(skb->dst->neighbour->ha);
+				if (neigh != old_neigh || qpn != old_qpn) {
+					/*
+					 * Sending to a different destination
+					 * from earlier skb's - send all
+					 * existing skbs (if any), and restart.
+					 */
+					XMIT_QUEUED_SKBS();
+					old_neigh = neigh;
+					old_qpn = qpn;
+				}
+
+				if (likely(!ipoib_process_skb(dev, skb, priv,
+							      neigh->ah, qpn,
+							      wr_num)))
+					wr_num++;
+
+				continue;
+			}
+
+			if (skb_queue_len(&neigh->queue) <
+			    IPOIB_MAX_PATH_REC_QUEUE) {
+				spin_lock(&priv->lock);
+				__skb_queue_tail(&neigh->queue, skb);
+				spin_unlock(&priv->lock);
+			} else {
+				dev_kfree_skb_any(skb);
+				++priv->stats.tx_dropped;
+				++max_skbs;
+			}
+		} else {
+			struct ipoib_pseudoheader *phdr =
+				(struct ipoib_pseudoheader *) skb->data;
+			skb_pull(skb, sizeof *phdr);
+
+			if (phdr->hwaddr[4] == 0xff) {
+				/* Add in the P_Key for multicast*/
+				phdr->hwaddr[8] = (priv->pkey >> 8) & 0xff;
+				phdr->hwaddr[9] = priv->pkey & 0xff;
+
+				XMIT_QUEUED_SKBS();
+				ipoib_mcast_send(dev, phdr->hwaddr + 4, skb);
+			} else {
+				/* unicast GID -- should be ARP or RARP reply */
+
+				if ((be16_to_cpup((__be16 *) skb->data) !=
+				    ETH_P_ARP) &&
+				    (be16_to_cpup((__be16 *) skb->data) !=
+				    ETH_P_RARP)) {
+					ipoib_warn(priv, "Unicast, no %s: type %04x, QPN %06x "
+						IPOIB_GID_FMT "\n",
+						skb->dst ? "neigh" : "dst",
+						be16_to_cpup((__be16 *)
+						skb->data),
+						IPOIB_QPN(phdr->hwaddr),
+						IPOIB_GID_RAW_ARG(phdr->hwaddr
+								  + 4));
+					dev_kfree_skb_any(skb);
+					++priv->stats.tx_dropped;
+					++max_skbs;
+					continue;
+				}
+				XMIT_QUEUED_SKBS();
+				unicast_arp_send(skb, dev, phdr);
+			}
+		}
+	}
+
+	/* Send out last packets (if any) */
+	XMIT_QUEUED_SKBS();
+
+	spin_unlock_irqrestore(&priv->tx_lock, flags);
+
+	return skb_queue_empty(blist) ? NETDEV_TX_OK : NETDEV_TX_BUSY;
+}
+
 static struct net_device_stats *ipoib_get_stats(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
@@ -896,13 +1045,37 @@ int ipoib_dev_init(struct net_device *de
 		goto out_rx_ring_cleanup;
 	}
 
-	/* priv->tx_head & tx_tail are already 0 */
+	/* priv->tx_head & tx_tail & tx_priv_tail are already 0 */
 
-	if (ipoib_ib_dev_init(dev, ca, port))
+	/* Allocate tx_sge */
+	priv->tx_sge = kmalloc(ipoib_sendq_size * sizeof *priv->tx_sge,
+			       GFP_KERNEL);
+	if (!priv->tx_sge) {
+		printk(KERN_WARNING "%s: failed to allocate TX sge (%d entries)\n",
+		       ca->name, ipoib_sendq_size);
 		goto out_tx_ring_cleanup;
+	}
+
+	/* Allocate tx_wr */
+	priv->tx_wr = kmalloc(ipoib_sendq_size * sizeof *priv->tx_wr,
+			      GFP_KERNEL);
+	if (!priv->tx_wr) {
+		printk(KERN_WARNING "%s: failed to allocate TX wr (%d entries)\n",
+		       ca->name, ipoib_sendq_size);
+		goto out_tx_sge_cleanup;
+	}
+
+	if (ipoib_ib_dev_init(dev, ca, port))
+		goto out_tx_wr_cleanup;
 
 	return 0;
 
+out_tx_wr_cleanup:
+	kfree(priv->tx_wr);
+
+out_tx_sge_cleanup:
+	kfree(priv->tx_sge);
+
 out_tx_ring_cleanup:
 	kfree(priv->tx_ring);
 
@@ -930,9 +1103,13 @@ void ipoib_dev_cleanup(struct net_device
 
 	kfree(priv->rx_ring);
 	kfree(priv->tx_ring);
+	kfree(priv->tx_sge);
+	kfree(priv->tx_wr);
 
 	priv->rx_ring = NULL;
 	priv->tx_ring = NULL;
+	priv->tx_sge = NULL;
+	priv->tx_wr = NULL;
 }
 
 static void ipoib_setup(struct net_device *dev)
@@ -943,6 +1120,7 @@ static void ipoib_setup(struct net_devic
 	dev->stop 		 = ipoib_stop;
 	dev->change_mtu 	 = ipoib_change_mtu;
 	dev->hard_start_xmit 	 = ipoib_start_xmit;
+	dev->hard_start_xmit_batch = ipoib_start_xmit_frames;
 	dev->get_stats 		 = ipoib_get_stats;
 	dev->tx_timeout 	 = ipoib_timeout;
 	dev->hard_header 	 = ipoib_hard_header;
@@ -979,6 +1157,7 @@ static void ipoib_setup(struct net_devic
 
 	spin_lock_init(&priv->lock);
 	spin_lock_init(&priv->tx_lock);
+	spin_lock_init(&priv->comp_lock);
 
 	mutex_init(&priv->mcast_mutex);
 	mutex_init(&priv->vlan_mutex);

^ permalink raw reply

* [ofa-general] [PATCH 5/9 Rev3] [IPoIB] Header file changes
From: Krishna Kumar @ 2007-08-08  9:32 UTC (permalink / raw)
  To: johnpol, kaber, shemminger, davem, sri
  Cc: jagana, Robert.Olsson, herbert, gaagaan, kumarkr, rdreier,
	peter.p.waskiewicz.jr, mcarlson, jeff, general, mchan, tgraf,
	hadi, netdev
In-Reply-To: <20070808093114.15396.22797.sendpatchset@localhost.localdomain>

IPoIB header file changes to use batching.

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
---
 ipoib.h |   11 ++++++++---
 1 files changed, 8 insertions(+), 3 deletions(-)

diff -ruNp ORG/drivers/infiniband/ulp/ipoib/ipoib.h NEW/drivers/infiniband/ulp/ipoib/ipoib.h
--- ORG/drivers/infiniband/ulp/ipoib/ipoib.h	2007-07-12 08:55:06.000000000 +0530
+++ NEW/drivers/infiniband/ulp/ipoib/ipoib.h	2007-08-07 13:11:19.000000000 +0530
@@ -266,11 +266,13 @@ struct ipoib_dev_priv {
 	struct ipoib_rx_buf *rx_ring;
 
 	spinlock_t           tx_lock;
+	spinlock_t           comp_lock;		/* to handle parallel WC's */
 	struct ipoib_tx_buf *tx_ring;
 	unsigned             tx_head;
 	unsigned             tx_tail;
-	struct ib_sge        tx_sge;
-	struct ib_send_wr    tx_wr;
+	unsigned             tx_prev_tail;	/* to handle parallel WC's */
+	struct ib_sge        *tx_sge;
+	struct ib_send_wr    *tx_wr;
 
 	struct ib_wc ibwc[IPOIB_NUM_WC];
 
@@ -365,8 +367,11 @@ static inline void ipoib_put_ah(struct i
 int ipoib_open(struct net_device *dev);
 int ipoib_add_pkey_attr(struct net_device *dev);
 
+int ipoib_process_skb(struct net_device *dev, struct sk_buff *skb,
+		      struct ipoib_dev_priv *priv, struct ipoib_ah *address,
+		      u32 qpn, int wr_num);
 void ipoib_send(struct net_device *dev, struct sk_buff *skb,
-		struct ipoib_ah *address, u32 qpn);
+		struct ipoib_ah *address, u32 qpn, int num_skbs);
 void ipoib_reap_ah(struct work_struct *work);
 
 void ipoib_flush_paths(struct net_device *dev);

^ permalink raw reply

* [ofa-general] [PATCH 0/9 Rev3] Implement batching skb API and support in IPoIB
From: Krishna Kumar @ 2007-08-08  9:31 UTC (permalink / raw)
  To: johnpol, sri, shemminger, davem, kaber
  Cc: jagana, Robert.Olsson, peter.p.waskiewicz.jr, herbert, gaagaan,
	kumarkr, rdreier, mcarlson, jeff, general, mchan, tgraf, hadi,
	netdev

This set of patches implements the batching API, and adds support for this
API in IPoIB.

List of changes from original submission:
-----------------------------------------
1.  [Patrick] Suggestion to remove tx_queue_len check for enabling batching.
2.  [Patrick] Move queue purging to dev_deactivate to free references on
	device going down.
3.  [Patrick] Remove changelog & unrelated changes from sch_generic.c
4.  [Patrick] Free skb_blist in unregister_netdev (also suggested to put in
	free_netdev, but it is not required as unregister_netdev will not fail
	at this location).
5.  [Stephen/Patrick] Remove /sysfs support.
6.  [Stephen] Add ethtool support.
7.  [Evgeniy] Stop interrupts while changing tx_batch_skb value.
8.  [Michael Tsirkin] Remove misleading comment in ipoib_send().
9.  [KK] Remove NETIF_F_BATCH_SKBS (device supports batching if API present).
10. [KK] Remove xmit_slots from netdev.
11. [KK] [IPoIB]: Use unsigned instead of int for index's, handle race
	between multiple WC's executing on different CPU's by having a new
	lock (or might need to hold lock for entire duration of WC - some
	optimization is possible here), changed multiple skb algo to not use
	xmit_slots, simplify code, minor performance changes wrt slot
	counters, etc.

List of changes implemented, tested and dropped:
------------------------------------------------
1. [Patrick] Suggestion to use skb_blist statically in netdevice. This
	reduces performance (~ 1%) (possibly due to having an extra check for
	dev->hard_start_xmit_batch API).
2. [Patrick] Suggestion to check if hard_start_xmit_batch can be removed:
	This reduces performance as a call to a non inline function is made,
	and an extra check in driver to see if skb is NULL.
3. [Sridhar] Suggestion to always use batching for regular xmit case too:
	While testing, for some reason the tests virtually hangs and
	transfers almost no data for higher number of proceses (like 64 and
	above).

Patches are described as:
		 Mail 0/9: This mail
		 Mail 1/9: HOWTO documentation
		 Mail 2/9: Introduce skb_blist and hard_start_xmit_batch API
		 Mail 3/9: Modify qdisc_run() to support batching
		 Mail 4/9: Add ethtool support to enable/disable batching
		 Mail 5/9: IPoIB header file changes to use batching
		 Mail 6/9: IPoIB CM & Multicast changes
		 Mail 7/9: IPoIB verb changes to use batching
		 Mail 8/9: IPoIB internal post and work completion handler
		 Mail 9/9: Implement the new batching API

RESULTS: The performance improvement for TCP No Delay is in the range of -8%
	to 320% (with -8% being the sole negative), with many individual tests
	giving 50% or more improvement (I think it is to do with the hw slots
	getting full quicker resulting in more batching when the queue gets
	woken). The results for TCP is in the range of -11% to 93%, with most
	of the tests (8/12) giving improvements.

ISSUES: I am getting a huge amount of retransmissions for both TCP and TCP No
	Delay cases for IPoIB (which explains the slight degradation for some
	test cases mentioned above). After a full test run, the regular code
	resulted in 74 retransmissions, while there were 1365716 retrans with
	batching code - or 18500 retransmissions for every 1 in regular code.
	But with this huge amount of retransmissions there is 20.7% overall
	improvement in BW (which implies batching will improve results even
	more if this problem is fixed). I suspect this is some issue in the
	driver/firmware since:
		a. I see similar low retransmissions numbers for E1000 (so
		   no bug in core changes).
		b. Even with batching set to maximum 2 skbs, I get almost the
		   same number of retransmissions (implies receiver is
		   probably not dropping skbs). ifconfig/netstat on receiver
		   gives no clue (drop/errors, etc).
	This issue delayed submitting patches for the last 2 weeks, as I was
	trying to debug this; any help from openIB community is appreciated.

Please review and provide feedback; and consider for inclusion.

Thanks,

- KK

---------------------------------------------------------------
Test Case                 ORG         NEW          % Change
---------------------------------------------------------------
                                 TCP
                                 ---
Size:32 Procs:1           2709        4217           55.66
Size:128 Procs:1          10950       15853          44.77
Size:512 Procs:1          35313       68224          93.19
Size:4096 Procs:1         118144      119935         1.51

Size:32 Procs:8           18976       22432          18.21
Size:128 Procs:8          66351       86072          29.72
Size:512 Procs:8          246546      234373         -4.93
Size:4096 Procs:8         268861      251540         -6.44

Size:32 Procs:16          35009       45861          30.99
Size:128 Procs:16         150979      164961         9.26
Size:512 Procs:16         259443      230730         -11.06
Size:4096 Procs:16        265313      246794         -6.98

                               TCP No Delay
                               ------------
Size:32 Procs:1           1930        1944           .72
Size:128 Procs:1          8573        7831           -8.65
Size:512 Procs:1          28536       29347          2.84
Size:4096 Procs:1         98916       104236         5.37

Size:32 Procs:8           4173        17560          320.80
Size:128 Procs:8          17350       66205          281.58
Size:512 Procs:8          69777       211467         203.06
Size:4096 Procs:8         201096      242578         20.62

Size:32 Procs:16          20570       37778          83.65
Size:128 Procs:16         95005       154464         62.58
Size:512 Procs:16         111677      221570         98.40
Size:4096 Procs:16        204765      240368         17.38
---------------------------------------------------------------
Overall:                  2340962     2826340        20.73%
                        [Summary: 19 Better cases, 5 worse]

Testing environment (on client, server uses 4096 sendq size):
	echo "Using 512 size sendq"
	modprobe ib_ipoib send_queue_size=512 recv_queue_size=512
	echo "4096 524288 4194304" > /proc/sys/net/ipv4/tcp_wmem
	echo "4096 1048576 4194304" > /proc/sys/net/ipv4/tcp_rmem
	echo 4194304 > /proc/sys/net/core/rmem_max
	echo 4194304 > /proc/sys/net/core/wmem_max
	echo 120000 > /proc/sys/net/core/netdev_max_backlog

^ permalink raw reply

* [PATCH 2/9 Rev3] [core] Add skb_blist & hard_start_xmit_batch
From: Krishna Kumar @ 2007-08-08  9:31 UTC (permalink / raw)
  To: johnpol, sri, shemminger, kaber, davem
  Cc: jagana, Robert.Olsson, rick.jones2, herbert, gaagaan, kumarkr,
	rdreier, peter.p.waskiewicz.jr, mcarlson, jeff, general, mchan,
	tgraf, hadi, netdev, Krishna Kumar, xma
In-Reply-To: <20070808093114.15396.22797.sendpatchset@localhost.localdomain>

Introduce skb_blist and hard_start_xmit_batch API, handle driver's usage
of the new API, and add support routines.

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
---
 include/linux/netdevice.h |    8 +++
 net/core/dev.c            |   98 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 106 insertions(+)

diff -ruNp ORG/include/linux/netdevice.h NEW/include/linux/netdevice.h
--- ORG/include/linux/netdevice.h	2007-08-06 08:25:37.000000000 +0530
+++ NEW/include/linux/netdevice.h	2007-08-07 13:11:19.000000000 +0530
@@ -456,6 +456,9 @@ struct net_device
 	/* Partially transmitted GSO packet. */
 	struct sk_buff		*gso_skb;
 
+	/* List of batch skbs (optional, used if driver supports batching API */
+	struct sk_buff_head	*skb_blist;
+
 	/* ingress path synchronizer */
 	spinlock_t		ingress_lock;
 	struct Qdisc		*qdisc_ingress;
@@ -472,6 +475,9 @@ struct net_device
 	void			*priv;	/* pointer to private data	*/
 	int			(*hard_start_xmit) (struct sk_buff *skb,
 						    struct net_device *dev);
+	int			(*hard_start_xmit_batch) (struct net_device
+							  *dev);
+
 	/* These may be needed for future network-power-down code. */
 	unsigned long		trans_start;	/* Time (in jiffies) of last Tx	*/
 
@@ -832,6 +838,8 @@ extern int		dev_set_mac_address(struct n
 					    struct sockaddr *);
 extern int		dev_hard_start_xmit(struct sk_buff *skb,
 					    struct net_device *dev);
+extern int		dev_add_skb_to_blist(struct sk_buff *skb,
+					     struct net_device *dev);
 
 extern void		dev_init(void);
 
diff -ruNp ORG/net/core/dev.c NEW/net/core/dev.c
--- ORG/net/core/dev.c	2007-08-06 08:25:40.000000000 +0530
+++ NEW/net/core/dev.c	2007-08-07 13:11:19.000000000 +0530
@@ -897,6 +897,55 @@ void netdev_state_change(struct net_devi
 	}
 }
 
+static void free_batching(struct net_device *dev)
+{
+	if (dev->skb_blist) {
+		if (!skb_queue_empty(dev->skb_blist))
+			skb_queue_purge(dev->skb_blist);
+		kfree(dev->skb_blist);
+		dev->skb_blist = NULL;
+	}
+}
+
+int dev_change_tx_batch_skb(struct net_device *dev, unsigned long new_batch_skb)
+{
+	int ret = 0;
+	struct sk_buff_head *blist;
+
+	if (!dev->hard_start_xmit_batch) {
+		/* Driver doesn't support batching skb API */
+		ret = -ENOTSUPP;
+		goto out;
+	}
+
+	/* Handle invalid argument */
+	if (new_batch_skb < 0) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* Check if new value is same as the current */
+	if (!!dev->skb_blist == !!new_batch_skb)
+		goto out;
+
+	if (new_batch_skb &&
+	    (blist = kmalloc(sizeof *blist, GFP_KERNEL)) == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	spin_lock(&dev->queue_lock);
+	if (new_batch_skb) {
+		skb_queue_head_init(blist);
+		dev->skb_blist = blist;
+	} else
+		free_batching(dev);
+	spin_unlock(&dev->queue_lock);
+
+out:
+	return ret;
+}
+
 /**
  *	dev_load 	- load a network module
  *	@name: name of interface
@@ -1459,6 +1508,45 @@ static int dev_gso_segment(struct sk_buf
 	return 0;
 }
 
+/*
+ * Add skb (skbs in case segmentation is required) to dev->skb_blist. We are
+ * holding QDISC RUNNING bit, so no one else can add to this list. Also, skbs
+ * are dequeued from this list when we call the driver, so the list is safe
+ * from simultaneous deletes too.
+ *
+ * Returns count of successful skb(s) added to skb_blist.
+ */
+int dev_add_skb_to_blist(struct sk_buff *skb, struct net_device *dev)
+{
+	if (!list_empty(&ptype_all))
+		dev_queue_xmit_nit(skb, dev);
+
+	if (netif_needs_gso(dev, skb)) {
+		if (unlikely(dev_gso_segment(skb))) {
+			kfree(skb);
+			return 0;
+		}
+
+		if (skb->next) {
+			int count = 0;
+
+			do {
+				struct sk_buff *nskb = skb->next;
+
+				skb->next = nskb->next;
+				__skb_queue_tail(dev->skb_blist, nskb);
+				count++;
+			} while (skb->next);
+
+			skb->destructor = DEV_GSO_CB(skb)->destructor;
+			kfree_skb(skb);
+			return count;
+		}
+	}
+	__skb_queue_tail(dev->skb_blist, skb);
+	return 1;
+}
+
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	if (likely(!skb->next)) {
@@ -3446,6 +3535,13 @@ int register_netdevice(struct net_device
 		}
 	}
 
+	if (dev->hard_start_xmit_batch) {
+		/* Driver supports batching skb API */
+		dev->skb_blist = kmalloc(sizeof *dev->skb_blist, GFP_KERNEL);
+		if (dev->skb_blist)
+			skb_queue_head_init(dev->skb_blist);
+	}
+
 	/*
 	 *	nil rebuild_header routine,
 	 *	that should be never called and used as just bug trap.
@@ -3787,6 +3882,9 @@ void unregister_netdevice(struct net_dev
 
 	synchronize_net();
 
+	/* Deallocate batching structure */
+	free_batching(dev);
+
 	/* Shutdown queueing discipline. */
 	dev_shutdown(dev);
 

^ permalink raw reply

* Re: 2.6.20->2.6.21 - networking dies after random time
From: Jarek Poplawski @ 2007-08-08  9:30 UTC (permalink / raw)
  To: Jean-Baptiste Vignaud
  Cc: cebbert, mingo, marcin.slusarz, tglx, torvalds, linux-kernel,
	shemminger, linux-net, netdev, akpm, alan
In-Reply-To: <JMG6AY$2339D4FEA804188F4C6D4F66006A6BB6@xandmail.com>

On Wed, Aug 08, 2007 at 10:59:22AM +0200, Jean-Baptiste Vignaud wrote:
> > Jean-Baptiste: I'm not sure how much of this testing you can afford?
> > If you can spare some time for this and your box isn't for
> > 'production' it could be very precious to diagnose such reproducible
> > bug.
> 
> Well i can continue testing patches for sure.

Great!

> 
> > Then, I'd have a few suggestions (you could choose any of them) like:
> > - trying these last test patches prepared for Marcin, too (but only
> > with kernels 2.6.21 - 2.6.23-rc1),
> 
> I'v patched 2.6.23-rc2 with those patches yesterday evening, and
> launched samba copy.
> Is rc2 ok ?

Yes! Mostly... 2.6.23-rc2 has a "temporary" patch applied, which should
work by itself (at last it works for Marcin). So, it's very good news
it works for you too. But, as a matter of fact the other patches
(I hope you mean these yesterday's two) probably are not used very
much (the last one could do some work but with other irqs).

So, it would be interesting to try them with e.g. 2.6.23-rc1. But not
together (I'd remind that after applying such a patch, make oldconfig,
make and so on plus testing, you can revert it with the same command
you used to patch plus -R option (e.g.: patch -p1 -R < ../patch1.diff),
to save some time on restoring a 'vanilla' kernel version.
The aim of these newer patches is to find why exactly this patch in
-rc2 works...

Cheers,
Jarek P.

^ permalink raw reply

* [PATCH 1/9 Rev3] [Doc] HOWTO Documentation for batching
From: Krishna Kumar @ 2007-08-08  9:31 UTC (permalink / raw)
  To: johnpol, kaber, shemminger, davem, sri
  Cc: jagana, Robert.Olsson, peter.p.waskiewicz.jr, herbert, gaagaan,
	kumarkr, rdreier, rick.jones2, mcarlson, jeff, general, mchan,
	tgraf, hadi, netdev, Krishna Kumar, xma
In-Reply-To: <20070808093114.15396.22797.sendpatchset@localhost.localdomain>

Add Documentation describing batching API.

Signed-off-by: Krishna Kumar <krkumar2@in.ibm.com>
---
 Batching_skb_API.txt |   82 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 82 insertions(+)

diff -ruNp ORG/Documentation/networking/Batching_skb_API.txt NEW/Documentation/networking/Batching_skb_API.txt
--- ORG/Documentation/networking/Batching_skb_API.txt	1970-01-01 05:30:00.000000000 +0530
+++ NEW/Documentation/networking/Batching_skb_API.txt	2007-08-07 22:41:55.000000000 +0530
@@ -0,0 +1,82 @@
+		 HOWTO for batching skb API support
+		 -----------------------------------
+
+Section 1: What is batching skb API ?
+Section 2: How batching API works vs the original API ?
+Section 3: How drivers can support this API ?
+Section 4: How users can work with this API ?
+
+
+Introduction: Kernel support for batching skb
+----------------------------------------------
+
+A new xmit API - hard_start_xmit_batch() is provided in the netdevice layer
+similar to the existing hard_start_xmit() API. Drivers which export this
+API can implement it similar to the hard_start_xmit handler. The new API
+should process multiple skbs (or even one) in a single call while the
+existing hard_start_xmit processes one skb. It is possible for the driver
+writer to re-use most of the code from the existing API in the new API
+without having code duplication.
+
+
+Section 1: What is batching skb API ?
+-------------------------------------
+
+	This API is optionally exported by a driver. The pre-requisite for a
+	driver to use this API is that it should have a reasonably sized
+	hardware queue that can process multiple skbs.
+
+
+Section 2: How batching API works vs the original API ?
+-------------------------------------------------------
+
+	The networking stack gets called from upper layer protocols with a
+	single skb to transmit. This skb is first enqueue'd and an attempt is
+	made to transmit it immediately (via qdisc_run). However, events like
+	tx lock contention, tx queue stopped, etc, can result in the skb not
+	getting sent out and it remains in the queue. When the next xmit is
+	called or when the queue is re-enabled, qdisc_run could potentially
+	find multiple packets in the queue, and iteratively send them all out
+	one-by-one.
+
+	The batching skb API was added to exploit this situation where all
+	skbs can be passed in one shot to the device. This reduces driver
+	processing, locking at the driver (or in stack for ~LLTX drivers)
+	gets amortized over multiple skbs, and in case of specific drivers
+	where every xmit results in a completion processing (like IPoIB),
+	optimizations can be made in the driver to request a completion for
+	only the last skb that was sent which results in saving interrupts
+	for every (but the last) skb that was sent in the same batch.
+
+	Batching can result in significant performance gains for systems that
+	have multiple data stream paths over the same network interface card.
+
+
+Section 3: How drivers can support this API ?
+---------------------------------------------
+
+	The new API - dev->hard_start_xmit_batch(struct net_device *dev),
+	simplistically, can be written almost identically to the regular
+	xmit API except that multiple skbs should be processed by the driver
+	instead of one skb. The new API doesn't get a skb as an argument,
+	instead it picks up all the skbs from dev->skb_blist, where it was
+	added by the core stack, and tries to send them out.
+
+	Batching requires the driver to set dev->hard_start_xmit_batch to the
+	new API implemented for that driver.
+
+
+Section 4: How users can work with this API ?
+---------------------------------------------
+
+	Batching could be disabled for a particular device, e.g. on desktop
+	systems if only one stream of network activity for that device is
+	taking place, since performance could be slightly affected due to
+	extra processing that batching adds (unless packets are getting
+	sent fast resulting in stopped queue's). Batching can be enabled if
+	more than one stream of network activity per device is being done,
+	e.g. on servers; or even desktop usage with multiple browser, chat,
+	file transfer sessions, etc.
+
+	Per device batching can be enabled/disabled using ethtool, where
+	passing 1 enables batching and passing 0 disables batching.

^ permalink raw reply

* Re: [RFC] allow device to stop packet mirror behaviour
From: Johannes Berg @ 2007-08-08  9:14 UTC (permalink / raw)
  To: David Miller
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA, andy-/Zus8d0mwwtBDgjK7y7TUQ,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20070807.180607.116354128.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>

[-- Attachment #1: Type: text/plain, Size: 748 bytes --]

On Tue, 2007-08-07 at 18:06 -0700, David Miller wrote:
> From: Johannes Berg <johannes-cdvu00un1VgdHxzADdlk8Q@public.gmane.org>
> Date: Tue, 07 Aug 2007 10:25:55 +0200
> 
> > The only way to solve this problem therefore seems to be to suppress the
> > mirroring out of the packet by dev_queue_xmit_nit(). The patch below
> > does that by way of adding a new netdev flag.
> 
> Multicast packets also get looped back in a similar manner in the ipv4
> code.  These will also be seen twice due to this issue.

I don't think these other places are of any interest because of the
radiotap+802.11 framing on the devices where it is relevant to us; you
can't actually add an IP route to a monitor interface as far as I can
tell.

johannes

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 190 bytes --]

^ permalink raw reply

* Re: 2.6.20->2.6.21 - networking dies after random time
From: Jean-Baptiste Vignaud @ 2007-08-08  8:59 UTC (permalink / raw)
  To: jarkao2
  Cc: cebbert, mingo, marcin.slusarz, tglx, torvalds, linux-kernel,
	shemminger, linux-net, netdev, akpm, alan

> Jean-Baptiste: I'm not sure how much of this testing you can afford?
> If you can spare some time for this and your box isn't for
> 'production' it could be very precious to diagnose such reproducible
> bug.

Well i can continue testing patches for sure.

> Then, I'd have a few suggestions (you could choose any of them) like:
> - trying these last test patches prepared for Marcin, too (but only
> with kernels 2.6.21 - 2.6.23-rc1),

I'v patched 2.6.23-rc2 with those patches yesterday evening, and
launched samba copy. 
Is rc2 ok ?

This morning the network is still up :
RX bytes:279853499958 (260.6 GiB)  TX bytes:7416695531 (6.9 GiB)

Still testing.

> If you would like to read something more about testing (then of
> course my suggestions could occur invalid - I'm a very bad tester
> myself...) you can try this:
> http://www.stardust.webpages.pl/files/handbook/

I'll have a look at the document. 

Jb



^ permalink raw reply

* Re: [RFC][BNX2X]: New driver for Broadcom 10Gb Ethernet.
From: Michael Buesch @ 2007-08-08  8:40 UTC (permalink / raw)
  To: David Miller; +Cc: hch, jeff, mchan, netdev, eliezert, lusinsky, eilong
In-Reply-To: <20070807.160808.38709450.davem@davemloft.net>

On Wednesday 08 August 2007 01:08:08 David Miller wrote:
> From: Christoph Hellwig <hch@infradead.org>
> Date: Wed, 8 Aug 2007 00:04:59 +0100
> 
> > Please take a look at kernel/irq/handle.c.  The irq handler is
> > always called with the right dev_id argument.  Everything would be a complete
> > nightmare to handle because you usually need to access the device private
> > data to check whether the shared irq is for this device.
> 
> Absolutely.
> 
> I can't believe we're even discussing something so obvious and
> wasting everyone's time.
> 
> 

Ok, then something "so obvious" is explained wrong in all off
the tutorials I read.
So it was _not_ that "obvious" to me. ok?

-- 
Greetings Michael.

^ permalink raw reply

* [PATCH] drivers/net: remove superfluous memset
From: Mariusz Kozlowski @ 2007-08-08  7:38 UTC (permalink / raw)
  To: Jeff Garzik; +Cc: netdev, linux-kernel

Hello,

	This patch covers sth like this:

dev = alloc_*dev(...
...
priv = netdev_priv(dev);
memset(priv, 0, sizeof(*priv));

The memset() here is superfluous. alloc_netdev() uses kzalloc()
to allocate needed memory so there is no need to zero the priv region
twice.

Signed-off-by: Mariusz Kozlowski <m.kozlowski@tuxland.pl>

 drivers/net/fs_enet/fs_enet-main.c   |    1 -
 drivers/net/myri10ge/myri10ge.c      |    1 -
 drivers/net/netxen/netxen_nic_main.c |    1 -
 drivers/net/sunlance.c               |    1 -
 drivers/net/usb/pegasus.c            |    1 -
 drivers/net/usb/rtl8150.c            |    1 -
 6 files changed, 6 deletions(-)

diff -upr linux-2.6.23-rc1-mm2-a/drivers/net/fs_enet/fs_enet-main.c linux-2.6.23-rc1-mm2-b/drivers/net/fs_enet/fs_enet-main.c
--- linux-2.6.23-rc1-mm2-a/drivers/net/fs_enet/fs_enet-main.c	2007-08-01 08:43:46.000000000 +0200
+++ linux-2.6.23-rc1-mm2-b/drivers/net/fs_enet/fs_enet-main.c	2007-08-07 01:26:12.000000000 +0200
@@ -962,7 +962,6 @@ static struct net_device *fs_init_instan
 	SET_MODULE_OWNER(ndev);
 
 	fep = netdev_priv(ndev);
-	memset(fep, 0, privsize);	/* clear everything */
 
 	fep->dev = dev;
 	dev_set_drvdata(dev, ndev);
diff -upr linux-2.6.23-rc1-mm2-a/drivers/net/myri10ge/myri10ge.c linux-2.6.23-rc1-mm2-b/drivers/net/myri10ge/myri10ge.c
--- linux-2.6.23-rc1-mm2-a/drivers/net/myri10ge/myri10ge.c	2007-08-01 08:43:46.000000000 +0200
+++ linux-2.6.23-rc1-mm2-b/drivers/net/myri10ge/myri10ge.c	2007-08-07 01:24:49.000000000 +0200
@@ -2852,7 +2852,6 @@ static int myri10ge_probe(struct pci_dev
 	SET_NETDEV_DEV(netdev, &pdev->dev);
 
 	mgp = netdev_priv(netdev);
-	memset(mgp, 0, sizeof(*mgp));
 	mgp->dev = netdev;
 	mgp->pdev = pdev;
 	mgp->csum_flag = MXGEFW_FLAGS_CKSUM;
diff -upr linux-2.6.23-rc1-mm2-a/drivers/net/netxen/netxen_nic_main.c linux-2.6.23-rc1-mm2-b/drivers/net/netxen/netxen_nic_main.c
--- linux-2.6.23-rc1-mm2-a/drivers/net/netxen/netxen_nic_main.c	2007-08-01 08:43:46.000000000 +0200
+++ linux-2.6.23-rc1-mm2-b/drivers/net/netxen/netxen_nic_main.c	2007-08-07 01:06:57.000000000 +0200
@@ -329,7 +329,6 @@ netxen_nic_probe(struct pci_dev *pdev, c
 	SET_NETDEV_DEV(netdev, &pdev->dev);
 
 	adapter = netdev->priv;
-	memset(adapter, 0 , sizeof(struct netxen_adapter));
 
 	adapter->ahw.pdev = pdev;
 	adapter->ahw.pci_func  = pci_func_id;
diff -upr linux-2.6.23-rc1-mm2-a/drivers/net/sunlance.c linux-2.6.23-rc1-mm2-b/drivers/net/sunlance.c
--- linux-2.6.23-rc1-mm2-a/drivers/net/sunlance.c	2007-08-01 08:43:46.000000000 +0200
+++ linux-2.6.23-rc1-mm2-b/drivers/net/sunlance.c	2007-08-07 01:20:37.000000000 +0200
@@ -1335,7 +1335,6 @@ static int __devinit sparc_lance_probe_o
 		return -ENOMEM;
 
 	lp = netdev_priv(dev);
-	memset(lp, 0, sizeof(*lp));
 
 	if (sparc_lance_debug && version_printed++ == 0)
 		printk (KERN_INFO "%s", version);
diff -upr linux-2.6.23-rc1-mm2-a/drivers/net/usb/pegasus.c linux-2.6.23-rc1-mm2-b/drivers/net/usb/pegasus.c
--- linux-2.6.23-rc1-mm2-a/drivers/net/usb/pegasus.c	2007-08-01 08:43:46.000000000 +0200
+++ linux-2.6.23-rc1-mm2-b/drivers/net/usb/pegasus.c	2007-08-07 01:16:44.000000000 +0200
@@ -1306,7 +1306,6 @@ static int pegasus_probe(struct usb_inte
 	}
 
 	pegasus = netdev_priv(net);
-	memset(pegasus, 0, sizeof (struct pegasus));
 	pegasus->dev_index = dev_index;
 	init_waitqueue_head(&pegasus->ctrl_wait);
 
diff -upr linux-2.6.23-rc1-mm2-a/drivers/net/usb/rtl8150.c linux-2.6.23-rc1-mm2-b/drivers/net/usb/rtl8150.c
--- linux-2.6.23-rc1-mm2-a/drivers/net/usb/rtl8150.c	2007-08-01 08:43:46.000000000 +0200
+++ linux-2.6.23-rc1-mm2-b/drivers/net/usb/rtl8150.c	2007-08-07 01:15:51.000000000 +0200
@@ -905,7 +905,6 @@ static int rtl8150_probe(struct usb_inte
 	}
 
 	dev = netdev_priv(netdev);
-	memset(dev, 0, sizeof(rtl8150_t));
 
 	dev->intr_buff = kmalloc(INTBUFSIZE, GFP_KERNEL);
 	if (!dev->intr_buff) {

^ permalink raw reply

* Re: 2.6.20->2.6.21 - networking dies after random time
From: Jarek Poplawski @ 2007-08-08  7:36 UTC (permalink / raw)
  To: Jean-Baptiste Vignaud
  Cc: cebbert, mingo, marcin.slusarz, tglx, torvalds, linux-kernel,
	shemminger, linux-net, netdev, akpm, alan
In-Reply-To: <20070808072114.GA1731@ff.dom.local>

On Wed, Aug 08, 2007 at 09:21:14AM +0200, Jarek Poplawski wrote:
> On Tue, Aug 07, 2007 at 07:16:33PM +0200, Jean-Baptiste Vignaud wrote:
...
> Marcin has done this with successfully using the most professional
> way: git bisect (which btw. I did learn yet), but, IMHO, it could be
...
Let me say this slow and distinctly: I didn't learn yet! (Shame on me!)
Sorry for these misspelings here and there...

Jarek P.

^ permalink raw reply

* Re: 2.6.20->2.6.21 - networking dies after random time
From: Jarek Poplawski @ 2007-08-08  7:21 UTC (permalink / raw)
  To: Jean-Baptiste Vignaud
  Cc: cebbert, mingo, marcin.slusarz, tglx, torvalds, linux-kernel,
	shemminger, linux-net, netdev, akpm, alan
In-Reply-To: <JMEYNL$38C71F6F1D25A7CE82B12729A59CF244@xandmail.com>

On Tue, Aug 07, 2007 at 07:16:33PM +0200, Jean-Baptiste Vignaud wrote:
...
> So this afternoon i compiled 2.6.23-rc2 with same options as 2.6.23-rc1
> and edited grub.conf to add nosmp but after reboot the box did not
> responded. Back home, i saw that the kernel failed because it was unable
> to find the partitions (mdadm failed, then LVM). After a few tests,
> removing nosmp let the kernel boot correctly. It seems that even the
> fedora provided kernels have the same behavior
> (well at least 2.6.22.1-41.fc7).

Sorry: it seems there is some implementation error or some modules
don't check CONFIG_SMP enough...

Of course testing this with smp should be precious too.
Only, after finding some problems, you should consider smp is quite
a new and complicated technology, at least regarding such old designs
as 3c905.

BTW: I didn't notice this yesterday, but your forcedeth uses new type
of irq handling (MSI), so it should explain why it's not affected.

Jean-Baptiste: I'm not sure how much of this testing you can afford?
If you can spare some time for this and your box isn't for
'production' it could be very precious to diagnose such reproducible
bug.

Then, I'd have a few suggestions (you could choose any of them) like:
- trying these last test patches prepared for Marcin, too (but only
with kernels 2.6.21 - 2.6.23-rc1),
- trying to find the last kernel version, which works for you:
Marcin has done this with successfully using the most professional
way: git bisect (which btw. I did learn yet), but, IMHO, it could be
very usable to try a "poor man's" bisect too older kernels like this:
2.6.18, so to try again this version of previos Fedora, but
preferably in "vanilla" version (there could be some problems if
something in your configs or hardware has changed); then if OK:
2.6.20; if OK 2.6.21-rc1 or -rc2 (there are usually heavy changes
in the beginning of a cycle); ithen try to jump forward or backward
around the middle of the range eg. -rc4. You should use each time the
same, current config and remember to 'make oldconfig' before make.

In my opinion it would be very precious even after some long time,
so there is no need to hurry and do this now. The most important:
if nothing has changed with your hardware in the meantime, you
should find 'the culprit' for sure.

But, if there are any problems about such testing, don't bother!
It could be really a lot of hard and maybe boring work.

If you would like to read something more about testing (then of
course my suggestions could occur invalid - I'm a very bad tester
myself...) you can try this:
http://www.stardust.webpages.pl/files/handbook/

If you would need some additional advice you can mail me privately
too (but my response could take a few days). Of course, if your find
something interresting I'd be glad to know about this, but let's be
honest - I'm not any authority about these drivers, so cc-ing a
maintainer should always be more usable.

Thanks,
Jarek P.

PS: it would be nice if you could fix your mail program on line
breaking (or try to do this manually).


^ permalink raw reply

* [PATCH 3/3] 8139too: clean up I/O remapping
From: Jeff Garzik @ 2007-08-08  6:45 UTC (permalink / raw)
  To: netdev; +Cc: LKML
In-Reply-To: <20070808064440.GA16595@havoc.gtf.org>


commit 9ee6b32a47b9abc565466a9c3b127a5246b452e5
Author: Jeff Garzik <jeff@garzik.org>
Date:   Wed Aug 8 02:42:47 2007 -0400

    [netdrvr] 8139too: clean up I/O remapping
    
    * no need to explicitly use ioport_map() for PIO.  pci_iomap()
      works just fine for both PIO and MMIO.  This permits deletion
      of redundant code.
    
    * add 'use_pio' module parameter to enable PIO mode without
      rebuilding the kernel.
    
    * change a few other module options' sysfs perms to enable visibility
    
    Signed-off-by: Jeff Garzik <jeff@garzik.org>

 drivers/net/8139too.c |   50 +++++++++++++++++++++-----------------------------
 1 file changed, 21 insertions(+), 29 deletions(-)

9ee6b32a47b9abc565466a9c3b127a5246b452e5
diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c
index 87ecaf8..761f716 100644
--- a/drivers/net/8139too.c
+++ b/drivers/net/8139too.c
@@ -121,8 +121,15 @@
 
 
 /* enable PIO instead of MMIO, if CONFIG_8139TOO_PIO is selected */
+enum rtl_bar_map_info {
+	rtl_pio_bar	= 0,	/* PCI BAR #0: PIO */
+	rtl_mmio_bar	= 1,	/* PCI BAR #1: MMIO */
+};
+
 #ifdef CONFIG_8139TOO_PIO
-#define USE_IO_OPS 1
+static int use_pio	= 1;
+#else
+static int use_pio;
 #endif
 
 /* define to 1, 2 or 3 to enable copious debugging info */
@@ -613,14 +620,17 @@ MODULE_DESCRIPTION ("RealTek RTL-8139 Fast Ethernet driver");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(DRV_VERSION);
 
-module_param(multicast_filter_limit, int, 0);
+module_param(multicast_filter_limit, int, 0444);
 module_param_array(media, int, NULL, 0);
 module_param_array(full_duplex, int, NULL, 0);
-module_param(debug, int, 0);
-MODULE_PARM_DESC (debug, "8139too bitmapped message enable number");
+module_param(debug, int, 0444);
+module_param(use_pio, int, 0444);
+
 MODULE_PARM_DESC (multicast_filter_limit, "8139too maximum number of filtered multicast addresses");
 MODULE_PARM_DESC (media, "8139too: Bits 4+9: force full duplex, bit 5: 100Mbps");
 MODULE_PARM_DESC (full_duplex, "8139too: Force full duplex for board(s) (1)");
+MODULE_PARM_DESC (debug, "8139too bitmapped message enable number");
+MODULE_PARM_DESC (use_pio, "Non-zero to enable PIO (rather than MMIO) register mapping");
 
 static int read_eeprom (void __iomem *ioaddr, int location, int addr_len);
 static int rtl8139_open (struct net_device *dev);
@@ -708,13 +718,7 @@ static void __rtl8139_cleanup_dev (struct net_device *dev)
 	assert (tp->pci_dev != NULL);
 	pdev = tp->pci_dev;
 
-#ifdef USE_IO_OPS
-	if (tp->mmio_addr)
-		ioport_unmap (tp->mmio_addr);
-#else
-	if (tp->mmio_addr)
-		pci_iounmap (pdev, tp->mmio_addr);
-#endif /* USE_IO_OPS */
+	pci_iounmap (pdev, tp->mmio_addr);
 
 	/* it's ok to call this even if we have no regions to free */
 	pci_release_regions (pdev);
@@ -790,32 +794,32 @@ static int __devinit rtl8139_init_board (struct pci_dev *pdev,
 	DPRINTK("PIO region size == 0x%02X\n", pio_len);
 	DPRINTK("MMIO region size == 0x%02lX\n", mmio_len);
 
-#ifdef USE_IO_OPS
 	/* make sure PCI base addr 0 is PIO */
 	if (!(pio_flags & IORESOURCE_IO)) {
 		dev_err(&pdev->dev, "region #0 not a PIO resource, aborting\n");
 		rc = -ENODEV;
 		goto err_out;
 	}
+
 	/* check for weird/broken PCI region reporting */
 	if (pio_len < RTL_MIN_IO_SIZE) {
 		dev_err(&pdev->dev, "Invalid PCI I/O region size(s), aborting\n");
 		rc = -ENODEV;
 		goto err_out;
 	}
-#else
+
 	/* make sure PCI base addr 1 is MMIO */
 	if (!(mmio_flags & IORESOURCE_MEM)) {
 		dev_err(&pdev->dev, "region #1 not an MMIO resource, aborting\n");
 		rc = -ENODEV;
 		goto err_out;
 	}
+
 	if (mmio_len < RTL_MIN_IO_SIZE) {
 		dev_err(&pdev->dev, "Invalid PCI mem region size(s), aborting\n");
 		rc = -ENODEV;
 		goto err_out;
 	}
-#endif
 
 	rc = pci_request_regions (pdev, DRV_NAME);
 	if (rc)
@@ -825,28 +829,16 @@ static int __devinit rtl8139_init_board (struct pci_dev *pdev,
 	/* enable PCI bus-mastering */
 	pci_set_master (pdev);
 
-#ifdef USE_IO_OPS
-	ioaddr = ioport_map(pio_start, pio_len);
-	if (!ioaddr) {
-		dev_err(&pdev->dev, "cannot map PIO, aborting\n");
-		rc = -EIO;
-		goto err_out;
-	}
-	dev->base_addr = pio_start;
-	tp->mmio_addr = ioaddr;
-	tp->regs_len = pio_len;
-#else
-	/* ioremap MMIO region */
-	ioaddr = pci_iomap(pdev, 1, 0);
+	/* iomap MMIO region */
+	ioaddr = pci_iomap(pdev, use_pio ? rtl_pio_bar : rtl_mmio_bar, 0);
 	if (ioaddr == NULL) {
-		dev_err(&pdev->dev, "cannot remap MMIO, aborting\n");
+		dev_err(&pdev->dev, "cannot map I/O regions, aborting\n");
 		rc = -EIO;
 		goto err_out;
 	}
 	dev->base_addr = (long) ioaddr;
 	tp->mmio_addr = ioaddr;
 	tp->regs_len = mmio_len;
-#endif /* USE_IO_OPS */
 
 	/* Bring old chips out of low-power mode. */
 	RTL_W8 (HltClk, 'R');

^ permalink raw reply related

* [PATCH 2/3] 8139too: clean up register defines and structs
From: Jeff Garzik @ 2007-08-08  6:45 UTC (permalink / raw)
  To: netdev; +Cc: LKML
In-Reply-To: <20070808064440.GA16595@havoc.gtf.org>


commit 23b0a2f0f4e98a23f6fdf5b7ae4a4d60ca0cc3e2
Author: Jeff Garzik <jeff@garzik.org>
Date:   Wed Aug 8 02:28:46 2007 -0400

    [netdrvr] 8139too: tab-align enums and structs; remove dead code
    
    * (main change) tab-align hardware register value enums, and hw struct
    
    * MMIO_FLUSH_AUDIT_COMPLETE has been defined to 1 for a while.  Remove
      the code activated when it is set to zero.
    
    Signed-off-by: Jeff Garzik <jeff@garzik.org>

 drivers/net/8139too.c |  340 ++++++++++++++++++++++++--------------------------
 1 file changed, 167 insertions(+), 173 deletions(-)

23b0a2f0f4e98a23f6fdf5b7ae4a4d60ca0cc3e2
diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c
index 0198f74..87ecaf8 100644
--- a/drivers/net/8139too.c
+++ b/drivers/net/8139too.c
@@ -291,198 +291,197 @@ static struct {
 
 /* Symbolic offsets to registers. */
 enum RTL8139_registers {
-	MAC0 = 0,		/* Ethernet hardware address. */
-	MAR0 = 8,		/* Multicast filter. */
-	TxStatus0 = 0x10,	/* Transmit status (Four 32bit registers). */
-	TxAddr0 = 0x20,		/* Tx descriptors (also four 32bit). */
-	RxBuf = 0x30,
-	ChipCmd = 0x37,
-	RxBufPtr = 0x38,
-	RxBufAddr = 0x3A,
-	IntrMask = 0x3C,
-	IntrStatus = 0x3E,
-	TxConfig = 0x40,
-	RxConfig = 0x44,
-	Timer = 0x48,		/* A general-purpose counter. */
-	RxMissed = 0x4C,	/* 24 bits valid, write clears. */
-	Cfg9346 = 0x50,
-	Config0 = 0x51,
-	Config1 = 0x52,
-	FlashReg = 0x54,
-	MediaStatus = 0x58,
-	Config3 = 0x59,
-	Config4 = 0x5A,		/* absent on RTL-8139A */
-	HltClk = 0x5B,
-	MultiIntr = 0x5C,
-	TxSummary = 0x60,
-	BasicModeCtrl = 0x62,
-	BasicModeStatus = 0x64,
-	NWayAdvert = 0x66,
-	NWayLPAR = 0x68,
-	NWayExpansion = 0x6A,
+	MAC0		= 0,	 /* Ethernet hardware address. */
+	MAR0		= 8,	 /* Multicast filter. */
+	TxStatus0	= 0x10,	 /* Transmit status (Four 32bit registers). */
+	TxAddr0		= 0x20,	 /* Tx descriptors (also four 32bit). */
+	RxBuf		= 0x30,
+	ChipCmd		= 0x37,
+	RxBufPtr	= 0x38,
+	RxBufAddr	= 0x3A,
+	IntrMask	= 0x3C,
+	IntrStatus	= 0x3E,
+	TxConfig	= 0x40,
+	RxConfig	= 0x44,
+	Timer		= 0x48,	 /* A general-purpose counter. */
+	RxMissed	= 0x4C,  /* 24 bits valid, write clears. */
+	Cfg9346		= 0x50,
+	Config0		= 0x51,
+	Config1		= 0x52,
+	FlashReg	= 0x54,
+	MediaStatus	= 0x58,
+	Config3		= 0x59,
+	Config4		= 0x5A,	 /* absent on RTL-8139A */
+	HltClk		= 0x5B,
+	MultiIntr	= 0x5C,
+	TxSummary	= 0x60,
+	BasicModeCtrl	= 0x62,
+	BasicModeStatus	= 0x64,
+	NWayAdvert	= 0x66,
+	NWayLPAR	= 0x68,
+	NWayExpansion	= 0x6A,
 	/* Undocumented registers, but required for proper operation. */
-	FIFOTMS = 0x70,		/* FIFO Control and test. */
-	CSCR = 0x74,		/* Chip Status and Configuration Register. */
-	PARA78 = 0x78,
-	PARA7c = 0x7c,		/* Magic transceiver parameter register. */
-	Config5 = 0xD8,		/* absent on RTL-8139A */
+	FIFOTMS		= 0x70,	 /* FIFO Control and test. */
+	CSCR		= 0x74,	 /* Chip Status and Configuration Register. */
+	PARA78		= 0x78,
+	PARA7c		= 0x7c,	 /* Magic transceiver parameter register. */
+	Config5		= 0xD8,	 /* absent on RTL-8139A */
 };
 
 enum ClearBitMasks {
-	MultiIntrClear = 0xF000,
-	ChipCmdClear = 0xE2,
-	Config1Clear = (1<<7)|(1<<6)|(1<<3)|(1<<2)|(1<<1),
+	MultiIntrClear	= 0xF000,
+	ChipCmdClear	= 0xE2,
+	Config1Clear	= (1<<7)|(1<<6)|(1<<3)|(1<<2)|(1<<1),
 };
 
 enum ChipCmdBits {
-	CmdReset = 0x10,
-	CmdRxEnb = 0x08,
-	CmdTxEnb = 0x04,
-	RxBufEmpty = 0x01,
+	CmdReset	= 0x10,
+	CmdRxEnb	= 0x08,
+	CmdTxEnb	= 0x04,
+	RxBufEmpty	= 0x01,
 };
 
 /* Interrupt register bits, using my own meaningful names. */
 enum IntrStatusBits {
-	PCIErr = 0x8000,
-	PCSTimeout = 0x4000,
-	RxFIFOOver = 0x40,
-	RxUnderrun = 0x20,
-	RxOverflow = 0x10,
-	TxErr = 0x08,
-	TxOK = 0x04,
-	RxErr = 0x02,
-	RxOK = 0x01,
-
-	RxAckBits = RxFIFOOver | RxOverflow | RxOK,
+	PCIErr		= 0x8000,
+	PCSTimeout	= 0x4000,
+	RxFIFOOver	= 0x40,
+	RxUnderrun	= 0x20,
+	RxOverflow	= 0x10,
+	TxErr		= 0x08,
+	TxOK		= 0x04,
+	RxErr		= 0x02,
+	RxOK		= 0x01,
+
+	RxAckBits	= RxFIFOOver | RxOverflow | RxOK,
 };
 
 enum TxStatusBits {
-	TxHostOwns = 0x2000,
-	TxUnderrun = 0x4000,
-	TxStatOK = 0x8000,
-	TxOutOfWindow = 0x20000000,
-	TxAborted = 0x40000000,
-	TxCarrierLost = 0x80000000,
+	TxHostOwns	= 0x2000,
+	TxUnderrun	= 0x4000,
+	TxStatOK	= 0x8000,
+	TxOutOfWindow	= 0x20000000,
+	TxAborted	= 0x40000000,
+	TxCarrierLost	= 0x80000000,
 };
 enum RxStatusBits {
-	RxMulticast = 0x8000,
-	RxPhysical = 0x4000,
-	RxBroadcast = 0x2000,
-	RxBadSymbol = 0x0020,
-	RxRunt = 0x0010,
-	RxTooLong = 0x0008,
-	RxCRCErr = 0x0004,
-	RxBadAlign = 0x0002,
-	RxStatusOK = 0x0001,
+	RxMulticast	= 0x8000,
+	RxPhysical	= 0x4000,
+	RxBroadcast	= 0x2000,
+	RxBadSymbol	= 0x0020,
+	RxRunt		= 0x0010,
+	RxTooLong	= 0x0008,
+	RxCRCErr	= 0x0004,
+	RxBadAlign	= 0x0002,
+	RxStatusOK	= 0x0001,
 };
 
 /* Bits in RxConfig. */
 enum rx_mode_bits {
-	AcceptErr = 0x20,
-	AcceptRunt = 0x10,
-	AcceptBroadcast = 0x08,
-	AcceptMulticast = 0x04,
-	AcceptMyPhys = 0x02,
-	AcceptAllPhys = 0x01,
+	AcceptErr	= 0x20,
+	AcceptRunt	= 0x10,
+	AcceptBroadcast	= 0x08,
+	AcceptMulticast	= 0x04,
+	AcceptMyPhys	= 0x02,
+	AcceptAllPhys	= 0x01,
 };
 
 /* Bits in TxConfig. */
 enum tx_config_bits {
-
         /* Interframe Gap Time. Only TxIFG96 doesn't violate IEEE 802.3 */
-        TxIFGShift = 24,
-        TxIFG84 = (0 << TxIFGShift),    /* 8.4us / 840ns (10 / 100Mbps) */
-        TxIFG88 = (1 << TxIFGShift),    /* 8.8us / 880ns (10 / 100Mbps) */
-        TxIFG92 = (2 << TxIFGShift),    /* 9.2us / 920ns (10 / 100Mbps) */
-        TxIFG96 = (3 << TxIFGShift),    /* 9.6us / 960ns (10 / 100Mbps) */
-
-	TxLoopBack = (1 << 18) | (1 << 17), /* enable loopback test mode */
-	TxCRC = (1 << 16),	/* DISABLE appending CRC to end of Tx packets */
-	TxClearAbt = (1 << 0),	/* Clear abort (WO) */
-	TxDMAShift = 8,		/* DMA burst value (0-7) is shifted this many bits */
-	TxRetryShift = 4,	/* TXRR value (0-15) is shifted this many bits */
-
-	TxVersionMask = 0x7C800000, /* mask out version bits 30-26, 23 */
+        TxIFGShift	= 24,
+        TxIFG84		= (0 << TxIFGShift), /* 8.4us / 840ns (10 / 100Mbps) */
+        TxIFG88		= (1 << TxIFGShift), /* 8.8us / 880ns (10 / 100Mbps) */
+        TxIFG92		= (2 << TxIFGShift), /* 9.2us / 920ns (10 / 100Mbps) */
+        TxIFG96		= (3 << TxIFGShift), /* 9.6us / 960ns (10 / 100Mbps) */
+
+	TxLoopBack	= (1 << 18) | (1 << 17), /* enable loopback test mode */
+	TxCRC		= (1 << 16),	/* DISABLE Tx pkt CRC append */
+	TxClearAbt	= (1 << 0),	/* Clear abort (WO) */
+	TxDMAShift	= 8, /* DMA burst value (0-7) is shifted X many bits */
+	TxRetryShift	= 4, /* TXRR value (0-15) is shifted X many bits */
+
+	TxVersionMask	= 0x7C800000, /* mask out version bits 30-26, 23 */
 };
 
 /* Bits in Config1 */
 enum Config1Bits {
-	Cfg1_PM_Enable = 0x01,
-	Cfg1_VPD_Enable = 0x02,
-	Cfg1_PIO = 0x04,
-	Cfg1_MMIO = 0x08,
-	LWAKE = 0x10,		/* not on 8139, 8139A */
+	Cfg1_PM_Enable	= 0x01,
+	Cfg1_VPD_Enable	= 0x02,
+	Cfg1_PIO	= 0x04,
+	Cfg1_MMIO	= 0x08,
+	LWAKE		= 0x10,		/* not on 8139, 8139A */
 	Cfg1_Driver_Load = 0x20,
-	Cfg1_LED0 = 0x40,
-	Cfg1_LED1 = 0x80,
-	SLEEP = (1 << 1),	/* only on 8139, 8139A */
-	PWRDN = (1 << 0),	/* only on 8139, 8139A */
+	Cfg1_LED0	= 0x40,
+	Cfg1_LED1	= 0x80,
+	SLEEP		= (1 << 1),	/* only on 8139, 8139A */
+	PWRDN		= (1 << 0),	/* only on 8139, 8139A */
 };
 
 /* Bits in Config3 */
 enum Config3Bits {
-	Cfg3_FBtBEn    = (1 << 0), /* 1 = Fast Back to Back */
-	Cfg3_FuncRegEn = (1 << 1), /* 1 = enable CardBus Function registers */
-	Cfg3_CLKRUN_En = (1 << 2), /* 1 = enable CLKRUN */
-	Cfg3_CardB_En  = (1 << 3), /* 1 = enable CardBus registers */
-	Cfg3_LinkUp    = (1 << 4), /* 1 = wake up on link up */
-	Cfg3_Magic     = (1 << 5), /* 1 = wake up on Magic Packet (tm) */
-	Cfg3_PARM_En   = (1 << 6), /* 0 = software can set twister parameters */
-	Cfg3_GNTSel    = (1 << 7), /* 1 = delay 1 clock from PCI GNT signal */
+	Cfg3_FBtBEn   	= (1 << 0), /* 1	= Fast Back to Back */
+	Cfg3_FuncRegEn	= (1 << 1), /* 1	= enable CardBus Function registers */
+	Cfg3_CLKRUN_En	= (1 << 2), /* 1	= enable CLKRUN */
+	Cfg3_CardB_En 	= (1 << 3), /* 1	= enable CardBus registers */
+	Cfg3_LinkUp   	= (1 << 4), /* 1	= wake up on link up */
+	Cfg3_Magic    	= (1 << 5), /* 1	= wake up on Magic Packet (tm) */
+	Cfg3_PARM_En  	= (1 << 6), /* 0	= software can set twister parameters */
+	Cfg3_GNTSel   	= (1 << 7), /* 1	= delay 1 clock from PCI GNT signal */
 };
 
 /* Bits in Config4 */
 enum Config4Bits {
-	LWPTN = (1 << 2),	/* not on 8139, 8139A */
+	LWPTN	= (1 << 2),	/* not on 8139, 8139A */
 };
 
 /* Bits in Config5 */
 enum Config5Bits {
-	Cfg5_PME_STS     = (1 << 0), /* 1 = PCI reset resets PME_Status */
-	Cfg5_LANWake     = (1 << 1), /* 1 = enable LANWake signal */
-	Cfg5_LDPS        = (1 << 2), /* 0 = save power when link is down */
-	Cfg5_FIFOAddrPtr = (1 << 3), /* Realtek internal SRAM testing */
-	Cfg5_UWF         = (1 << 4), /* 1 = accept unicast wakeup frame */
-	Cfg5_MWF         = (1 << 5), /* 1 = accept multicast wakeup frame */
-	Cfg5_BWF         = (1 << 6), /* 1 = accept broadcast wakeup frame */
+	Cfg5_PME_STS   	= (1 << 0), /* 1	= PCI reset resets PME_Status */
+	Cfg5_LANWake   	= (1 << 1), /* 1	= enable LANWake signal */
+	Cfg5_LDPS      	= (1 << 2), /* 0	= save power when link is down */
+	Cfg5_FIFOAddrPtr= (1 << 3), /* Realtek internal SRAM testing */
+	Cfg5_UWF        = (1 << 4), /* 1 = accept unicast wakeup frame */
+	Cfg5_MWF        = (1 << 5), /* 1 = accept multicast wakeup frame */
+	Cfg5_BWF        = (1 << 6), /* 1 = accept broadcast wakeup frame */
 };
 
 enum RxConfigBits {
 	/* rx fifo threshold */
-	RxCfgFIFOShift = 13,
-	RxCfgFIFONone = (7 << RxCfgFIFOShift),
+	RxCfgFIFOShift	= 13,
+	RxCfgFIFONone	= (7 << RxCfgFIFOShift),
 
 	/* Max DMA burst */
-	RxCfgDMAShift = 8,
+	RxCfgDMAShift	= 8,
 	RxCfgDMAUnlimited = (7 << RxCfgDMAShift),
 
 	/* rx ring buffer length */
-	RxCfgRcv8K = 0,
-	RxCfgRcv16K = (1 << 11),
-	RxCfgRcv32K = (1 << 12),
-	RxCfgRcv64K = (1 << 11) | (1 << 12),
+	RxCfgRcv8K	= 0,
+	RxCfgRcv16K	= (1 << 11),
+	RxCfgRcv32K	= (1 << 12),
+	RxCfgRcv64K	= (1 << 11) | (1 << 12),
 
 	/* Disable packet wrap at end of Rx buffer. (not possible with 64k) */
-	RxNoWrap = (1 << 7),
+	RxNoWrap	= (1 << 7),
 };
 
 /* Twister tuning parameters from RealTek.
    Completely undocumented, but required to tune bad links on some boards. */
 enum CSCRBits {
-	CSCR_LinkOKBit = 0x0400,
-	CSCR_LinkChangeBit = 0x0800,
-	CSCR_LinkStatusBits = 0x0f000,
-	CSCR_LinkDownOffCmd = 0x003c0,
-	CSCR_LinkDownCmd = 0x0f3c0,
+	CSCR_LinkOKBit		= 0x0400,
+	CSCR_LinkChangeBit	= 0x0800,
+	CSCR_LinkStatusBits	= 0x0f000,
+	CSCR_LinkDownOffCmd	= 0x003c0,
+	CSCR_LinkDownCmd	= 0x0f3c0,
 };
 
 enum Cfg9346Bits {
-	Cfg9346_Lock = 0x00,
-	Cfg9346_Unlock = 0xC0,
+	Cfg9346_Lock	= 0x00,
+	Cfg9346_Unlock	= 0xC0,
 };
 
 typedef enum {
-	CH_8139 = 0,
+	CH_8139	= 0,
 	CH_8139_K,
 	CH_8139A,
 	CH_8139A_G,
@@ -495,8 +494,8 @@ typedef enum {
 } chip_t;
 
 enum chip_flags {
-	HasHltClk = (1 << 0),
-	HasLWake = (1 << 1),
+	HasHltClk	= (1 << 0),
+	HasLWake	= (1 << 1),
 };
 
 #define HW_REVID(b30, b29, b28, b27, b26, b23, b22) \
@@ -569,36 +568,44 @@ struct rtl_extra_stats {
 };
 
 struct rtl8139_private {
-	void __iomem *mmio_addr;
-	int drv_flags;
-	struct pci_dev *pci_dev;
-	u32 msg_enable;
-	struct net_device_stats stats;
-	unsigned char *rx_ring;
-	unsigned int cur_rx;	/* Index into the Rx buffer of next Rx pkt. */
-	unsigned int tx_flag;
-	unsigned long cur_tx;
-	unsigned long dirty_tx;
-	unsigned char *tx_buf[NUM_TX_DESC];	/* Tx bounce buffers */
-	unsigned char *tx_bufs;	/* Tx bounce buffer region. */
-	dma_addr_t rx_ring_dma;
-	dma_addr_t tx_bufs_dma;
-	signed char phys[4];		/* MII device addresses. */
-	char twistie, twist_row, twist_col;	/* Twister tune state. */
-	unsigned int watchdog_fired : 1;
-	unsigned int default_port : 4;	/* Last dev->if_port value. */
-	unsigned int have_thread : 1;
-	spinlock_t lock;
-	spinlock_t rx_lock;
-	chip_t chipset;
-	u32 rx_config;
-	struct rtl_extra_stats xstats;
-
-	struct delayed_work thread;
-
-	struct mii_if_info mii;
-	unsigned int regs_len;
-	unsigned long fifo_copy_timeout;
+	void __iomem		*mmio_addr;
+	int			drv_flags;
+	struct pci_dev		*pci_dev;
+	u32			msg_enable;
+	struct net_device_stats	stats;
+
+	unsigned char		*rx_ring;
+	unsigned int		cur_rx;	/* RX buf index of next pkt */
+	dma_addr_t		rx_ring_dma;
+
+	unsigned int		tx_flag;
+	unsigned long		cur_tx;
+	unsigned long		dirty_tx;
+	unsigned char		*tx_buf[NUM_TX_DESC];	/* Tx bounce buffers */
+	unsigned char		*tx_bufs;	/* Tx bounce buffer region. */
+	dma_addr_t		tx_bufs_dma;
+
+	signed char		phys[4];	/* MII device addresses. */
+
+				/* Twister tune state. */
+	char			twistie, twist_row, twist_col;
+
+	unsigned int		watchdog_fired : 1;
+	unsigned int		default_port : 4; /* Last dev->if_port value. */
+	unsigned int		have_thread : 1;
+
+	spinlock_t		lock;
+	spinlock_t		rx_lock;
+
+	chip_t			chipset;
+	u32			rx_config;
+	struct rtl_extra_stats	xstats;
+
+	struct delayed_work	thread;
+
+	struct mii_if_info	mii;
+	unsigned int		regs_len;
+	unsigned long		fifo_copy_timeout;
 };
 
 MODULE_AUTHOR ("Jeff Garzik <jgarzik@pobox.com>");
@@ -646,24 +653,11 @@ static const struct ethtool_ops rtl8139_ethtool_ops;
 #define RTL_W16_F(reg, val16)	do { iowrite16 ((val16), ioaddr + (reg)); ioread16 (ioaddr + (reg)); } while (0)
 #define RTL_W32_F(reg, val32)	do { iowrite32 ((val32), ioaddr + (reg)); ioread32 (ioaddr + (reg)); } while (0)
 
-
-#define MMIO_FLUSH_AUDIT_COMPLETE 1
-#if MMIO_FLUSH_AUDIT_COMPLETE
-
 /* write MMIO register */
 #define RTL_W8(reg, val8)	iowrite8 ((val8), ioaddr + (reg))
 #define RTL_W16(reg, val16)	iowrite16 ((val16), ioaddr + (reg))
 #define RTL_W32(reg, val32)	iowrite32 ((val32), ioaddr + (reg))
 
-#else
-
-/* write MMIO register, then flush */
-#define RTL_W8		RTL_W8_F
-#define RTL_W16		RTL_W16_F
-#define RTL_W32		RTL_W32_F
-
-#endif /* MMIO_FLUSH_AUDIT_COMPLETE */
-
 /* read MMIO register */
 #define RTL_R8(reg)		ioread8 (ioaddr + (reg))
 #define RTL_R16(reg)		ioread16 (ioaddr + (reg))

^ permalink raw reply related

* [PATCH 1/3] 8139cp, 8139too: use generic DMA
From: Jeff Garzik @ 2007-08-08  6:44 UTC (permalink / raw)
  To: netdev; +Cc: LKML


commit f09934422b162e0eb7a8fd3b4b25ffc92e31541a
Author: Jeff Garzik <jeff@garzik.org>
Date:   Wed Aug 8 02:16:04 2007 -0400

    [netdrvr] 8139cp, 8139too: convert to generic DMA
    
    Signed-off-by: Jeff Garzik <jeff@garzik.org>

 drivers/net/8139cp.c  |   31 +++++++++++++++++--------------
 drivers/net/8139too.c |   20 ++++++++++----------
 2 files changed, 27 insertions(+), 24 deletions(-)

f09934422b162e0eb7a8fd3b4b25ffc92e31541a
diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c
index a79f28c..0664701 100644
--- a/drivers/net/8139cp.c
+++ b/drivers/net/8139cp.c
@@ -560,7 +560,7 @@ rx_status_loop:
 
 		skb_reserve(new_skb, RX_OFFSET);
 
-		pci_unmap_single(cp->pdev, mapping,
+		dma_unmap_single(&cp->pdev->dev, mapping,
 				 buflen, PCI_DMA_FROMDEVICE);
 
 		/* Handle checksum offloading for incoming packets. */
@@ -571,7 +571,7 @@ rx_status_loop:
 
 		skb_put(skb, len);
 
-		mapping = pci_map_single(cp->pdev, new_skb->data, buflen,
+		mapping = dma_map_single(&cp->pdev->dev, new_skb->data, buflen,
 					 PCI_DMA_FROMDEVICE);
 		cp->rx_skb[rx_tail] = new_skb;
 
@@ -704,7 +704,7 @@ static void cp_tx (struct cp_private *cp)
 		skb = cp->tx_skb[tx_tail];
 		BUG_ON(!skb);
 
-		pci_unmap_single(cp->pdev, le64_to_cpu(txd->addr),
+		dma_unmap_single(&cp->pdev->dev, le64_to_cpu(txd->addr),
 				 le32_to_cpu(txd->opts1) & 0xffff,
 				 PCI_DMA_TODEVICE);
 
@@ -782,7 +782,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev)
 		dma_addr_t mapping;
 
 		len = skb->len;
-		mapping = pci_map_single(cp->pdev, skb->data, len, PCI_DMA_TODEVICE);
+		mapping = dma_map_single(&cp->pdev->dev, skb->data, len, PCI_DMA_TODEVICE);
 		CP_VLAN_TX_TAG(txd, vlan_tag);
 		txd->addr = cpu_to_le64(mapping);
 		wmb();
@@ -818,7 +818,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev)
 		 */
 		first_eor = eor;
 		first_len = skb_headlen(skb);
-		first_mapping = pci_map_single(cp->pdev, skb->data,
+		first_mapping = dma_map_single(&cp->pdev->dev, skb->data,
 					       first_len, PCI_DMA_TODEVICE);
 		cp->tx_skb[entry] = skb;
 		entry = NEXT_TX(entry);
@@ -830,7 +830,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev)
 			dma_addr_t mapping;
 
 			len = this_frag->size;
-			mapping = pci_map_single(cp->pdev,
+			mapping = dma_map_single(&cp->pdev->dev,
 						 ((void *) page_address(this_frag->page) +
 						  this_frag->page_offset),
 						 len, PCI_DMA_TODEVICE);
@@ -1069,8 +1069,8 @@ static int cp_refill_rx (struct cp_private *cp)
 
 		skb_reserve(skb, RX_OFFSET);
 
-		mapping = pci_map_single(cp->pdev, skb->data, cp->rx_buf_sz,
-					 PCI_DMA_FROMDEVICE);
+		mapping = dma_map_single(&cp->pdev->dev, skb->data,
+					 cp->rx_buf_sz, PCI_DMA_FROMDEVICE);
 		cp->rx_skb[i] = skb;
 
 		cp->rx_ring[i].opts2 = 0;
@@ -1110,7 +1110,8 @@ static int cp_alloc_rings (struct cp_private *cp)
 {
 	void *mem;
 
-	mem = pci_alloc_consistent(cp->pdev, CP_RING_BYTES, &cp->ring_dma);
+	mem = dma_alloc_coherent(&cp->pdev->dev, CP_RING_BYTES,
+				 &cp->ring_dma, GFP_KERNEL);
 	if (!mem)
 		return -ENOMEM;
 
@@ -1128,7 +1129,7 @@ static void cp_clean_rings (struct cp_private *cp)
 	for (i = 0; i < CP_RX_RING_SIZE; i++) {
 		if (cp->rx_skb[i]) {
 			desc = cp->rx_ring + i;
-			pci_unmap_single(cp->pdev, le64_to_cpu(desc->addr),
+			dma_unmap_single(&cp->pdev->dev,le64_to_cpu(desc->addr),
 					 cp->rx_buf_sz, PCI_DMA_FROMDEVICE);
 			dev_kfree_skb(cp->rx_skb[i]);
 		}
@@ -1139,7 +1140,7 @@ static void cp_clean_rings (struct cp_private *cp)
 			struct sk_buff *skb = cp->tx_skb[i];
 
 			desc = cp->tx_ring + i;
-			pci_unmap_single(cp->pdev, le64_to_cpu(desc->addr),
+			dma_unmap_single(&cp->pdev->dev,le64_to_cpu(desc->addr),
 					 le32_to_cpu(desc->opts1) & 0xffff,
 					 PCI_DMA_TODEVICE);
 			if (le32_to_cpu(desc->opts1) & LastFrag)
@@ -1158,7 +1159,8 @@ static void cp_clean_rings (struct cp_private *cp)
 static void cp_free_rings (struct cp_private *cp)
 {
 	cp_clean_rings(cp);
-	pci_free_consistent(cp->pdev, CP_RING_BYTES, cp->rx_ring, cp->ring_dma);
+	dma_free_coherent(&cp->pdev->dev, CP_RING_BYTES, cp->rx_ring,
+			  cp->ring_dma);
 	cp->rx_ring = NULL;
 	cp->tx_ring = NULL;
 }
@@ -1517,7 +1519,8 @@ static void cp_get_ethtool_stats (struct net_device *dev,
 	dma_addr_t dma;
 	int i;
 
-	nic_stats = pci_alloc_consistent(cp->pdev, sizeof(*nic_stats), &dma);
+	nic_stats = dma_alloc_coherent(&cp->pdev->dev, sizeof(*nic_stats),
+				       &dma, GFP_KERNEL);
 	if (!nic_stats)
 		return;
 
@@ -1552,7 +1555,7 @@ static void cp_get_ethtool_stats (struct net_device *dev,
 	tmp_stats[i++] = cp->cp_stats.rx_frags;
 	BUG_ON(i != CP_NUM_STATS);
 
-	pci_free_consistent(cp->pdev, sizeof(*nic_stats), nic_stats, dma);
+	dma_free_coherent(&cp->pdev->dev, sizeof(*nic_stats), nic_stats, dma);
 }
 
 static const struct ethtool_ops cp_ethtool_ops = {
diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c
index f4e4298..0198f74 100644
--- a/drivers/net/8139too.c
+++ b/drivers/net/8139too.c
@@ -1314,18 +1314,18 @@ static int rtl8139_open (struct net_device *dev)
 	if (retval)
 		return retval;
 
-	tp->tx_bufs = pci_alloc_consistent(tp->pci_dev, TX_BUF_TOT_LEN,
-					   &tp->tx_bufs_dma);
-	tp->rx_ring = pci_alloc_consistent(tp->pci_dev, RX_BUF_TOT_LEN,
-					   &tp->rx_ring_dma);
+	tp->tx_bufs = dma_alloc_coherent(&tp->pci_dev->dev, TX_BUF_TOT_LEN,
+					   &tp->tx_bufs_dma, GFP_KERNEL);
+	tp->rx_ring = dma_alloc_coherent(&tp->pci_dev->dev, RX_BUF_TOT_LEN,
+					   &tp->rx_ring_dma, GFP_KERNEL);
 	if (tp->tx_bufs == NULL || tp->rx_ring == NULL) {
 		free_irq(dev->irq, dev);
 
 		if (tp->tx_bufs)
-			pci_free_consistent(tp->pci_dev, TX_BUF_TOT_LEN,
+			dma_free_coherent(&tp->pci_dev->dev, TX_BUF_TOT_LEN,
 					    tp->tx_bufs, tp->tx_bufs_dma);
 		if (tp->rx_ring)
-			pci_free_consistent(tp->pci_dev, RX_BUF_TOT_LEN,
+			dma_free_coherent(&tp->pci_dev->dev, RX_BUF_TOT_LEN,
 					    tp->rx_ring, tp->rx_ring_dma);
 
 		return -ENOMEM;
@@ -2248,10 +2248,10 @@ static int rtl8139_close (struct net_device *dev)
 
 	rtl8139_tx_clear (tp);
 
-	pci_free_consistent(tp->pci_dev, RX_BUF_TOT_LEN,
-			    tp->rx_ring, tp->rx_ring_dma);
-	pci_free_consistent(tp->pci_dev, TX_BUF_TOT_LEN,
-			    tp->tx_bufs, tp->tx_bufs_dma);
+	dma_free_coherent(&tp->pci_dev->dev, RX_BUF_TOT_LEN,
+			  tp->rx_ring, tp->rx_ring_dma);
+	dma_free_coherent(&tp->pci_dev->dev, TX_BUF_TOT_LEN,
+			  tp->tx_bufs, tp->tx_bufs_dma);
 	tp->rx_ring = NULL;
 	tp->tx_bufs = NULL;
 

^ permalink raw reply related

* Re: [PATCH] drivers/net/wireless/wl3501_cs.c: remove redundant memset
From: Mariusz Kozlowski @ 2007-08-08  5:58 UTC (permalink / raw)
  To: John W. Linville
  Cc: acme-f8uhVLnGfZaxAyOMLChx1axOck334EZe, Jeff Garzik,
	netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20070807001155.GA3285-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>

> Please send wireless patches to linux-wireless-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
> and CC me.

Ok. Did you pick the patch up?

Regards,

	Mariusz

^ permalink raw reply

* Re: TCP's initial cwnd setting correct?...
From: David Miller @ 2007-08-08  5:01 UTC (permalink / raw)
  To: ilpo.jarvinen; +Cc: netdev
In-Reply-To: <Pine.LNX.4.64.0708061508070.8788@kivilampi-30.cs.helsinki.fi>

From: "Ilpo_Järvinen" <ilpo.jarvinen@helsinki.fi>
Date: Mon, 6 Aug 2007 15:37:15 +0300 (EEST)

> ...Another thing that makes me wonder, is the tp->mss_cache > 1460 check 
> as based on rfc3390 (and also it's precursor rfc2414) with up to 2190 
> bytes MSS TCP can use 3 as initial cwnd...

I did the research and my memory was at least partially right.

Below is an old bogus change of mine and the later revert with
Alexey's explanation.

This seems to be dealing with receive window calculation issues,
rather than snd_cwnd.  But they might be related and you should
consider this very seriously.

commit 6b251858d377196b8cea20e65cae60f584a42735
Author: David S. Miller <davem@sunset.davemloft.net>
Date:   Wed Sep 28 16:31:48 2005 -0700

    [TCP]: Fix init_cwnd calculations in tcp_select_initial_window()
    
    Match it up to what RFC2414 really specifies.
    Noticed by Rick Jones.
    
    Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index d6e3d26..caf2e2c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -190,15 +190,16 @@ void tcp_select_initial_window(int __space, __u32 mss,
 	}
 
 	/* Set initial window to value enough for senders,
-	 * following RFC1414. Senders, not following this RFC,
+	 * following RFC2414. Senders, not following this RFC,
 	 * will be satisfied with 2.
 	 */
 	if (mss > (1<<*rcv_wscale)) {
-		int init_cwnd = 4;
-		if (mss > 1460*3)
+		int init_cwnd;
+
+		if (mss > 1460)
 			init_cwnd = 2;
-		else if (mss > 1460)
-			init_cwnd = 3;
+		else
+			init_cwnd = (mss > 1095) ? 3 : 4;
 		if (*rcv_wnd > init_cwnd*mss)
 			*rcv_wnd = init_cwnd*mss;
 	}
--------------------
commit 01ff367e62f0474e4d39aa5812cbe2a30d96e1e9
Author: David S. Miller <davem@sunset.davemloft.net>
Date:   Thu Sep 29 17:07:20 2005 -0700

    [TCP]: Revert 6b251858d377196b8cea20e65cae60f584a42735
    
    But retain the comment fix.
    
    Alexey Kuznetsov has explained the situation as follows:
    
    --------------------
    
    I think the fix is incorrect. Look, the RFC function init_cwnd(mss) is
    not continuous: f.e. for mss=1095 it needs initial window 1095*4, but
    for mss=1096 it is 1096*3. We do not know exactly what mss sender used
    for calculations. If we advertised 1096 (and calculate initial window
    3*1096), the sender could limit it to some value < 1096 and then it
    will need window his_mss*4 > 3*1096 to send initial burst.
    
    See?
    
    So, the honest function for inital rcv_wnd derived from
    tcp_init_cwnd() is:
    
    	init_rcv_wnd(mss)=
    	  min { init_cwnd(mss1)*mss1 for mss1 <= mss }
    
    It is something sort of:
    
    	if (mss < 1096)
    		return mss*4;
    	if (mss < 1096*2)
    		return 1096*4;
    	return mss*2;
    
    (I just scrablled a graph of piece of paper, it is difficult to see or
    to explain without this)
    
    I selected it differently giving more window than it is strictly
    required.  Initial receive window must be large enough to allow sender
    following to the rfc (or just setting initial cwnd to 2) to send
    initial burst.  But besides that it is arbitrary, so I decided to give
    slack space of one segment.
    
    Actually, the logic was:
    
    If mss is low/normal (<=ethernet), set window to receive more than
    initial burst allowed by rfc under the worst conditions
    i.e. mss*4. This gives slack space of 1 segment for ethernet frames.
    
    For msses slighlty more than ethernet frame, take 3. Try to give slack
    space of 1 frame again.
    
    If mss is huge, force 2*mss. No slack space.
    
    Value 1460*3 is really confusing. Minimal one is 1096*2, but besides
    that it is an arbitrary value. It was meant to be ~4096. 1460*3 is
    just the magic number from RFC, 1460*3 = 1095*4 is the magic :-), so
    that I guess hands typed this themselves.
    
    --------------------
    
    Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index caf2e2c..c5b911f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -194,12 +194,11 @@ void tcp_select_initial_window(int __space, __u32 mss,
 	 * will be satisfied with 2.
 	 */
 	if (mss > (1<<*rcv_wscale)) {
-		int init_cwnd;
-
-		if (mss > 1460)
+		int init_cwnd = 4;
+		if (mss > 1460*3)
 			init_cwnd = 2;
-		else
-			init_cwnd = (mss > 1095) ? 3 : 4;
+		else if (mss > 1460)
+			init_cwnd = 3;
 		if (*rcv_wnd > init_cwnd*mss)
 			*rcv_wnd = init_cwnd*mss;
 	}

^ permalink raw reply related

* [PATCH RFC]: napi_struct V6
From: David Miller @ 2007-08-08  4:54 UTC (permalink / raw)
  To: netdev; +Cc: shemminger, jgarzik, hadi, rusty


Changes since V5:

1) Revert unnecessary TX locking changes in bnx2 and tg3

2) ++i

This is probably what I'll check into my net-2.6.24 tree

Thanks.

[NET]: Make NAPI polling independant of struct net_device objects.

Several devices have multiple independant RX queues per net
device, and some have a single interrupt doorbell for several
queues.

In either case, it's easier to support layouts like that if the
structure representing the poll is independant from the net
device itself.

The signature of the ->poll() call back goes from:

	int foo_poll(struct net_device *dev, int *budget)

to

	int foo_poll(struct napi_struct *napi, int budget)

The caller is returned the number of RX packets processed (or
the number of "NAPI credits" consumed if you want to get
abstract).  The callee no longer messes around bumping
dev->quota, *budget, etc. because that is all handled in the
caller upon return.

The napi_struct is to be embedded in the device driver private data
structures.

Furthermore, it is the driver's responsibility to disable all NAPI
instances in it's ->stop() device close handler.  Since the
napi_struct is privatized into the driver's private data structures,
only the driver knows how to get at all of the napi_struct instances
it may have per-device.

With lots of help and suggestions from Rusty Russell.

[ Ported to current tree and all drivers converted.  -DaveM ]

Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/Documentation/networking/NAPI_HOWTO.txt b/Documentation/networking/NAPI_HOWTO.txt
index 7907435..e39f763 100644
--- a/Documentation/networking/NAPI_HOWTO.txt
+++ b/Documentation/networking/NAPI_HOWTO.txt
@@ -60,39 +60,39 @@ A) DMA ring or enough RAM to store packets in software devices.
 B) Ability to turn off interrupts or maybe events that send packets up 
 the stack.
 
-NAPI processes packet events in what is known as dev->poll() method.
-Typically, only packet receive events are processed in dev->poll(). 
+NAPI processes packet events in what is known as napi->poll() method.
+Typically, only packet receive events are processed in napi->poll(). 
 The rest of the events MAY be processed by the regular interrupt handler 
 to reduce processing latency (justified also because there are not that 
 many of them).
-Note, however, NAPI does not enforce that dev->poll() only processes 
+Note, however, NAPI does not enforce that napi->poll() only processes 
 receive events. 
 Tests with the tulip driver indicated slightly increased latency if
-all of the interrupt handler is moved to dev->poll(). Also MII handling
+all of the interrupt handler is moved to napi->poll(). Also MII handling
 gets a little trickier.
 The example used in this document is to move the receive processing only
-to dev->poll(); this is shown with the patch for the tulip driver.
+to napi->poll(); this is shown with the patch for the tulip driver.
 For an example of code that moves all the interrupt driver to 
-dev->poll() look at the ported e1000 code.
+napi->poll() look at the ported e1000 code.
 
 There are caveats that might force you to go with moving everything to 
-dev->poll(). Different NICs work differently depending on their status/event 
+napi->poll(). Different NICs work differently depending on their status/event 
 acknowledgement setup. 
 There are two types of event register ACK mechanisms.
 	I)  what is known as Clear-on-read (COR).
 	when you read the status/event register, it clears everything!
 	The natsemi and sunbmac NICs are known to do this.
-	In this case your only choice is to move all to dev->poll()
+	In this case your only choice is to move all to napi->poll()
 
 	II) Clear-on-write (COW)
 	 i) you clear the status by writing a 1 in the bit-location you want.
 		These are the majority of the NICs and work the best with NAPI.
-		Put only receive events in dev->poll(); leave the rest in
+		Put only receive events in napi->poll(); leave the rest in
 		the old interrupt handler.
 	 ii) whatever you write in the status register clears every thing ;->
 		Cant seem to find any supported by Linux which do this. If
 		someone knows such a chip email us please.
-		Move all to dev->poll()
+		Move all to napi->poll()
 
 C) Ability to detect new work correctly.
 NAPI works by shutting down event interrupts when there's work and
@@ -110,56 +110,64 @@ discussion.
 Locking rules and environmental guarantees
 ==========================================
 
--Guarantee: Only one CPU at any time can call dev->poll(); this is because
+-Guarantee: Only one CPU at any time can call napi->poll(); this is because
 only one CPU can pick the initial interrupt and hence the initial
-netif_rx_schedule(dev);
+netif_rx_schedule(dev, napi);
 - The core layer invokes devices to send packets in a round robin format.
 This implies receive is totally lockless because of the guarantee that only 
 one CPU is executing it.
 -  contention can only be the result of some other CPU accessing the rx
 ring. This happens only in close() and suspend() (when these methods
-try to clean the rx ring); 
-****guarantee: driver authors need not worry about this; synchronization 
-is taken care for them by the top net layer.
--local interrupts are enabled (if you dont move all to dev->poll()). For 
+try to clean the rx ring);  Therefore the close() and suspend() methods
+must take care to invoke napi_disable() on each NAPI instance assosciated
+with the driver.
+-local interrupts are enabled (if you dont move all to napi->poll()). For 
 example link/MII and txcomplete continue functioning just same old way. 
 This improves the latency of processing these events. It is also assumed that 
 the receive interrupt is the largest cause of noise. Note this might not 
 always be true. 
 [according to Manfred Spraul, the winbond insists on sending one 
 txmitcomplete interrupt for each packet (although this can be mitigated)].
-For these broken drivers, move all to dev->poll().
+For these broken drivers, move all to napi->poll().
 
-For the rest of this text, we'll assume that dev->poll() only
+For the rest of this text, we'll assume that napi->poll() only
 processes receive events.
 
 new methods introduce by NAPI
 =============================
 
-a) netif_rx_schedule(dev)
-Called by an IRQ handler to schedule a poll for device
+a) netif_rx_schedule(dev, napi)
+Called by an IRQ handler to schedule a poll on the given NAPI instance
+for device
 
-b) netif_rx_schedule_prep(dev)
-puts the device in a state which allows for it to be added to the
-CPU polling list if it is up and running. You can look at this as
-the first half of  netif_rx_schedule(dev) above; the second half
-being c) below.
+b) netif_rx_schedule_prep(dev, napi)
+puts given NAPI instance of the device in a state which allows for it
+to be added to the CPU polling list if the device is up and running.
+You can look at this as the first half of netif_rx_schedule(dev, napi)
+above; the second half being c) below.
 
-c) __netif_rx_schedule(dev)
-Add device to the poll list for this CPU; assuming that _prep above
-has already been called and returned 1.
+c) __netif_rx_schedule(dev, napi)
+Add NAPI instance of device to the poll list for this CPU; assuming
+that _prep above has already been called and returned 1.
 
-d) netif_rx_reschedule(dev, undo)
-Called to reschedule polling for device specifically for some
-deficient hardware. Read Appendix 2 for more details.
+d) netif_rx_reschedule(dev, napi)
+Called to reschedule polling for a NAPI instance of a device specifically
+for some deficient hardware. Read Appendix 2 for more details.
 
-e) netif_rx_complete(dev)
-
-Remove interface from the CPU poll list: it must be in the poll list
-on current cpu. This primitive is called by dev->poll(), when
+e) netif_rx_complete(dev, napi)
+Remove NAPI instance of device from the CPU poll list: it must be in the
+poll list on the current cpu. This primitive is called by napi->poll(), when
 it completes its work. The device cannot be out of poll list at this
 call, if it is then clearly it is a BUG(). You'll know ;->
 
+f) netif_napi_add(dev, napi, poll, weight)
+Register a NAPI instance for the given device, using the given
+'poll' handler and device weight 'weight'.  For most devices, a
+"struct napi_struct" will be embedded in the netdevice private
+structure, and given as the second argument here.  For more complicated
+devices with multiple receive queues, a seperate NAPI struct instance
+will be allocated and added for each such queue.
+
 All of the above methods are used below, so keep reading for clarity.
 
 Device driver changes to be made when porting NAPI
@@ -167,54 +175,49 @@ Device driver changes to be made when porting NAPI
 
 Below we describe what kind of changes are required for NAPI to work.
 
-1) introduction of dev->poll() method 
+1) introduction of napi->poll() method 
 =====================================
 
 This is the method that is invoked by the network core when it requests
-for new packets from the driver. A driver is allowed to send upto
-dev->quota packets by the current CPU before yielding to the network
-subsystem (so other devices can also get opportunity to send to the stack).
+for new packets from the driver for a specific NAPI instance. A driver
+is allowed to send upto 'budget' packets by the current CPU before yielding
+to the network subsystem (so other devices can also get opportunity to send
+to the stack).
 
-dev->poll() prototype looks as follows:
-int my_poll(struct net_device *dev, int *budget)
+napi->poll() prototype looks as follows:
+
+int my_poll(struct napi_struct *napi, int budget)
 
 budget is the remaining number of packets the network subsystem on the
 current CPU can send up the stack before yielding to other system tasks.
-*Each driver is responsible for decrementing budget by the total number of
-packets sent.
-	Total number of packets cannot exceed dev->quota.
 
-dev->poll() method is invoked by the top layer, the driver just sends if it 
-can to the stack the packet quantity requested.
+The napi->poll() method is invoked by the top layer, the driver just sends
+if it can to the stack the packet quantity requested.
 
-more on dev->poll() below after the interrupt changes are explained.
+more on napi->poll() below after the interrupt changes are explained.
 
-2) registering dev->poll() method
+2) registering napi->poll() method
 ===================================
 
-dev->poll should be set in the dev->probe() method. 
+napi->poll should be registered in the dev->probe() method via
+netif_napi_add() as explained above.
+
 e.g:
 dev->open = my_open;
 .
 .
-/* two new additions */
-/* first register my poll method */
-dev->poll = my_poll;
-/* next register my weight/quanta; can be overridden in /proc */
-dev->weight = 16;
+netif_napi_add(dev, &private->napi, my_poll, 16);
 .
 .
 dev->stop = my_close;
 
-
-
-3) scheduling dev->poll()
+3) scheduling napi->poll()
 =============================
 This involves modifying the interrupt handler and the code
 path which takes the packet off the NIC and sends them to the 
 stack.
 
-it's important at this point to introduce the classical D Becker 
+It's important at this point to introduce the classical D Becker 
 interrupt processor:
 
 ------------------
@@ -296,13 +299,13 @@ netdevice_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 		}
 /************************ start note *********************************/		
 		if (status & rx_interrupt || (status & rx_nobuffs)) {
-			if (netif_rx_schedule_prep(dev)) {
+			if (netif_rx_schedule_prep(dev, &tp->napi)) {
 
 				/* disable interrupts caused 
 			         *	by arriving packets */
 				disable_rx_and_rxnobuff_ints();
 				/* tell system we have work to be done. */
-				__netif_rx_schedule(dev);
+				__netif_rx_schedule(dev, &tp->napi);
 			} else {
 				printk("driver bug! interrupt while in poll\n");
 				/* FIX by disabling interrupts  */
@@ -344,10 +347,10 @@ register for those two items above; clearing is done in the place where
 proper work is done within NAPI; at the poll() and refill_rx_ring() 
 discussed further below.
 netif_rx_schedule_prep() returns 1 if device is in running state and
-gets successfully added to the core poll list. If we get a zero value
-we can _almost_ assume are already added to the list (instead of not running. 
-Logic based on the fact that you shouldn't get interrupt if not running)
-We rectify this by disabling rx and rxnobuf interrupts.
+the NAPI instance gets successfully added to the core poll list. If we get
+a zero value we can _almost_ assume are already added to the list (instead
+of not running.  Logic based on the fact that you shouldn't get interrupt
+if not running) We rectify this by disabling rx and rxnobuf interrupts.
 
 II) that receive_packets(dev) and make_rx_buffs_avail() may have disappeared.
 These functionalities are still around actually......
@@ -355,7 +358,7 @@ These functionalities are still around actually......
 infact, receive_packets(dev) is very close to my_poll() and 
 make_rx_buffs_avail() is invoked from my_poll()
 
-4) converting receive_packets() to dev->poll()
+4) converting receive_packets() to napi->poll()
 ===============================================
 
 We need to convert the classical D Becker receive_packets(dev) to my_poll()
@@ -430,18 +433,15 @@ the call.
 -------------------------------------------------------------------
 
 /* this is called by the network core */
-static int my_poll (struct net_device *dev, int *budget)
+static int my_poll (struct napi_struct *napi, int budget)
 {
-
-	struct my_private *tp = (struct my_private *)dev->priv;
+	struct my_private *tp = container_of(napi, struct my_private, napi);
+	struct net_device *dev = tp->dev;
 	rx_ring = tp->rx_ring;
 	cur_rx = tp->cur_rx;
 	int entry = cur_rx % RX_BUF_LEN;
-	/* maximum packets to send to the stack */
-/************************ note note *********************************/		
-	int rx_work_limit = dev->quota;
+	int work_done = 0;
 
-/************************ end note note *********************************/		
     do {  // outer beginning loop starts here
 
 	clear_rx_status_register_bit();
@@ -461,11 +461,11 @@ static int my_poll (struct net_device *dev, int *budget)
                 if ((rx_size > (MAX_ETH_FRAME_SIZE+4)) ||
                     (!(rx_status & RxStatusOK))) {
                         netdrv_rx_err (rx_status, dev, tp, ioaddr);
-                        return 1;
+                        return work_done;
                 }
 
 /************************ note note *********************************/		
-                if (--rx_work_limit < 0) { /* we got packets, but no quota */
+		if (work_done >= budget) { /* we got packets, but no quota */
 			/* store current ring pointer state */
 			tp->cur_rx = cur_rx;
 
@@ -492,7 +492,7 @@ static int my_poll (struct net_device *dev, int *budget)
 
 		/* move to the next skb on the ring */
 		entry = (++tp->cur_rx) % RX_RING_SIZE;
-		received++ ;
+		work_done++;
 
         }
 
@@ -518,17 +518,13 @@ static int my_poll (struct net_device *dev, int *budget)
 
 done:
 
-/************************ note note *********************************/		
-        dev->quota -= received;
-        *budget -= received;
-
         /* If RX ring is not full we are out of memory. */
         if (tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL)
                 goto oom;
 
 	/* we are happy/done, no more packets on ring; put us back
 	to where we can start processing interrupts again */
-        netif_rx_complete(dev);
+        netif_rx_complete(dev, napi);
 	enable_rx_and_rxnobuf_ints();
 
        /* The last op happens after poll completion. Which means the following:
@@ -544,20 +540,14 @@ done:
         * processed irqs. The good news: no events are ever lost.
         */
 
-        return 0;   /* done */
+        return work_done;   /* done */
 
 not_done:
         if (tp->cur_rx - tp->dirty_rx > RX_RING_SIZE/2 ||
             tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL)
                 refill_rx_ring(dev);
 
-        if (!received) {
-                printk("received==0\n");
-                received = 1;
-        }
-        dev->quota -= received;
-        *budget -= received;
-        return 1;  /* not_done */
+        return work_done;
 
 oom:
         /* Start timer, stop polling, but do not enable rx interrupts. */
@@ -569,7 +559,6 @@ oom:
 -------------------------------------------------------------------
 
 From above we note that:
-0) rx_work_limit = dev->quota 
 1) refill_rx_ring() is in charge of clearing the bit for rxnobuff when
 it does the work.
 2) We have a done and not_done state.
@@ -597,18 +586,12 @@ a)
         if (tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL)
                 restart_timer();
 
-	else netif_rx_schedule(dev);  /* we are back on the poll list */
+	else netif_rx_schedule(dev, napi);  /* we are back on the poll list */
 	
 5) dev->close() and dev->suspend() issues
 ==========================================
-The driver writer needn't worry about this; the top net layer takes
-care of it.
-
-6) Adding new Stats to /proc 
-=============================
-In order to debug some of the new features, we introduce new stats
-that need to be collected.
-TODO: Fill this later.
+These handlers should shut off all NAPI instances for a given device using
+napi_disable(napi)
 
 APPENDIX 1: discussion on using ethernet HW FC
 ==============================================
@@ -708,8 +691,8 @@ restart_poll:
 	.
 	.
 	enable_rx_interrupts()
-	netif_rx_complete(dev);
-	if (ring_has_new_packet() && netif_rx_reschedule(dev, received)) {
+	netif_rx_complete(dev, napi);
+	if (ring_has_new_packet() && netif_rx_reschedule(dev, napi)) {
 		disable_rx_and_rxnobufs()
 		goto restart_poll
 	} while (rx_status_is_set);
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 285c143..35f3ca4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -228,6 +228,8 @@ struct ipoib_dev_priv {
 
 	struct net_device *dev;
 
+	struct napi_struct napi;
+
 	unsigned long flags;
 
 	struct mutex mcast_mutex;
@@ -351,7 +353,7 @@ extern struct workqueue_struct *ipoib_workqueue;
 
 /* functions */
 
-int ipoib_poll(struct net_device *dev, int *budget);
+int ipoib_poll(struct napi_struct *napi, int budget);
 void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
 
 struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 1094488..cfbcb3c 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -281,20 +281,18 @@ static void ipoib_ib_handle_tx_wc(struct net_device *dev, struct ib_wc *wc)
 			   wc->status, wr_id, wc->vendor_err);
 }
 
-int ipoib_poll(struct net_device *dev, int *budget)
+int ipoib_poll(struct napi_struct *napi, int budget)
 {
-	struct ipoib_dev_priv *priv = netdev_priv(dev);
-	int max = min(*budget, dev->quota);
+	struct ipoib_dev_priv *priv = container_of(napi, struct ipoib_dev_priv, napi);
+	struct net_device *dev = priv->dev;
 	int done;
 	int t;
-	int empty;
 	int n, i;
 
 	done  = 0;
-	empty = 0;
 
-	while (max) {
-		t = min(IPOIB_NUM_WC, max);
+	while (done < budget) {
+		t = min(IPOIB_NUM_WC, budget - done);
 		n = ib_poll_cq(priv->cq, t, priv->ibwc);
 
 		for (i = 0; i < n; ++i) {
@@ -302,42 +300,35 @@ int ipoib_poll(struct net_device *dev, int *budget)
 
 			if (wc->wr_id & IPOIB_CM_OP_SRQ) {
 				++done;
-				--max;
 				ipoib_cm_handle_rx_wc(dev, wc);
 			} else if (wc->wr_id & IPOIB_OP_RECV) {
 				++done;
-				--max;
 				ipoib_ib_handle_rx_wc(dev, wc);
 			} else
 				ipoib_ib_handle_tx_wc(dev, wc);
 		}
 
-		if (n != t) {
-			empty = 1;
+		if (n != t)
 			break;
-		}
 	}
 
-	dev->quota -= done;
-	*budget    -= done;
-
-	if (empty) {
-		netif_rx_complete(dev);
+	if (done < budget) {
+		netif_rx_complete(dev, napi);
 		if (unlikely(ib_req_notify_cq(priv->cq,
 					      IB_CQ_NEXT_COMP |
-					      IB_CQ_REPORT_MISSED_EVENTS)) &&
-		    netif_rx_reschedule(dev, 0))
-			return 1;
-
-		return 0;
+					      IB_CQ_REPORT_MISSED_EVENTS)))
+		    netif_rx_reschedule(napi);
 	}
 
-	return 1;
+	return done;
 }
 
 void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
 {
-	netif_rx_schedule(dev_ptr);
+	struct net_device *dev = dev_ptr;
+	struct ipoib_dev_priv *priv = netdev_priv(dev);
+
+	netif_rx_schedule(dev, &priv->napi);
 }
 
 static inline int post_send(struct ipoib_dev_priv *priv,
@@ -577,7 +568,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush)
 	int i;
 
 	clear_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
-	netif_poll_disable(dev);
+	napi_disable(&priv->napi);
 
 	ipoib_cm_dev_stop(dev);
 
@@ -660,7 +651,7 @@ timeout:
 		msleep(1);
 	}
 
-	netif_poll_enable(dev);
+	napi_enable(&priv->napi);
 	ib_req_notify_cq(priv->cq, IB_CQ_NEXT_COMP);
 
 	return 0;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 894b1dc..8d4dad4 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -141,6 +141,8 @@ static int ipoib_stop(struct net_device *dev)
 
 	clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
 
+	napi_disable(&priv->napi);
+
 	netif_stop_queue(dev);
 
 	clear_bit(IPOIB_FLAG_NETIF_STOPPED, &priv->flags);
@@ -948,8 +950,8 @@ static void ipoib_setup(struct net_device *dev)
 	dev->hard_header 	 = ipoib_hard_header;
 	dev->set_multicast_list  = ipoib_set_mcast_list;
 	dev->neigh_setup         = ipoib_neigh_setup_dev;
-	dev->poll                = ipoib_poll;
-	dev->weight              = 100;
+
+	netif_napi_add(dev, &priv->napi, ipoib_poll, 100);
 
 	dev->watchdog_timeo 	 = HZ;
 
diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c
index a79f28c..923821d 100644
--- a/drivers/net/8139cp.c
+++ b/drivers/net/8139cp.c
@@ -334,6 +334,8 @@ struct cp_private {
 	spinlock_t		lock;
 	u32			msg_enable;
 
+	struct napi_struct	napi;
+
 	struct pci_dev		*pdev;
 	u32			rx_config;
 	u16			cpcmd;
@@ -501,12 +503,12 @@ static inline unsigned int cp_rx_csum_ok (u32 status)
 	return 0;
 }
 
-static int cp_rx_poll (struct net_device *dev, int *budget)
+static int cp_rx_poll(struct napi_struct *napi, int budget)
 {
-	struct cp_private *cp = netdev_priv(dev);
-	unsigned rx_tail = cp->rx_tail;
-	unsigned rx_work = dev->quota;
-	unsigned rx;
+	struct cp_private *cp = container_of(napi, struct cp_private, napi);
+	struct net_device *dev = cp->dev;
+	unsigned int rx_tail = cp->rx_tail;
+	int rx;
 
 rx_status_loop:
 	rx = 0;
@@ -588,33 +590,28 @@ rx_next:
 			desc->opts1 = cpu_to_le32(DescOwn | cp->rx_buf_sz);
 		rx_tail = NEXT_RX(rx_tail);
 
-		if (!rx_work--)
+		if (rx >= budget)
 			break;
 	}
 
 	cp->rx_tail = rx_tail;
 
-	dev->quota -= rx;
-	*budget -= rx;
-
 	/* if we did not reach work limit, then we're done with
 	 * this round of polling
 	 */
-	if (rx_work) {
+	if (rx < budget) {
 		unsigned long flags;
 
 		if (cpr16(IntrStatus) & cp_rx_intr_mask)
 			goto rx_status_loop;
 
-		local_irq_save(flags);
+		spin_lock_irqsave(&cp->lock, flags);
 		cpw16_f(IntrMask, cp_intr_mask);
-		__netif_rx_complete(dev);
-		local_irq_restore(flags);
-
-		return 0;	/* done */
+		__netif_rx_complete(dev, napi);
+		spin_unlock_irqrestore(&cp->lock, flags);
 	}
 
-	return 1;		/* not done */
+	return rx;
 }
 
 static irqreturn_t cp_interrupt (int irq, void *dev_instance)
@@ -647,9 +644,9 @@ static irqreturn_t cp_interrupt (int irq, void *dev_instance)
 	}
 
 	if (status & (RxOK | RxErr | RxEmpty | RxFIFOOvr))
-		if (netif_rx_schedule_prep(dev)) {
+		if (netif_rx_schedule_prep(dev, &cp->napi)) {
 			cpw16_f(IntrMask, cp_norx_intr_mask);
-			__netif_rx_schedule(dev);
+			__netif_rx_schedule(dev, &cp->napi);
 		}
 
 	if (status & (TxOK | TxErr | TxEmpty | SWInt))
@@ -1198,6 +1195,8 @@ static int cp_close (struct net_device *dev)
 	struct cp_private *cp = netdev_priv(dev);
 	unsigned long flags;
 
+	napi_disable(&cp->napi);
+
 	if (netif_msg_ifdown(cp))
 		printk(KERN_DEBUG "%s: disabling interface\n", dev->name);
 
@@ -1933,11 +1932,10 @@ static int cp_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->hard_start_xmit = cp_start_xmit;
 	dev->get_stats = cp_get_stats;
 	dev->do_ioctl = cp_ioctl;
-	dev->poll = cp_rx_poll;
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	dev->poll_controller = cp_poll_controller;
 #endif
-	dev->weight = 16;	/* arbitrary? from NAPI_HOWTO.txt. */
+	netif_napi_add(dev, &cp->napi, cp_rx_poll, 16);
 #ifdef BROKEN
 	dev->change_mtu = cp_change_mtu;
 #endif
diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c
index f4e4298..538493d 100644
--- a/drivers/net/8139too.c
+++ b/drivers/net/8139too.c
@@ -573,6 +573,8 @@ struct rtl8139_private {
 	int drv_flags;
 	struct pci_dev *pci_dev;
 	u32 msg_enable;
+	struct napi_struct napi;
+	struct net_device *dev;
 	struct net_device_stats stats;
 	unsigned char *rx_ring;
 	unsigned int cur_rx;	/* Index into the Rx buffer of next Rx pkt. */
@@ -625,10 +627,10 @@ static void rtl8139_tx_timeout (struct net_device *dev);
 static void rtl8139_init_ring (struct net_device *dev);
 static int rtl8139_start_xmit (struct sk_buff *skb,
 			       struct net_device *dev);
-static int rtl8139_poll(struct net_device *dev, int *budget);
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void rtl8139_poll_controller(struct net_device *dev);
 #endif
+static int rtl8139_poll(struct napi_struct *napi, int budget);
 static irqreturn_t rtl8139_interrupt (int irq, void *dev_instance);
 static int rtl8139_close (struct net_device *dev);
 static int netdev_ioctl (struct net_device *dev, struct ifreq *rq, int cmd);
@@ -963,6 +965,7 @@ static int __devinit rtl8139_init_one (struct pci_dev *pdev,
 
 	assert (dev != NULL);
 	tp = netdev_priv(dev);
+	tp->dev = dev;
 
 	ioaddr = tp->mmio_addr;
 	assert (ioaddr != NULL);
@@ -976,8 +979,7 @@ static int __devinit rtl8139_init_one (struct pci_dev *pdev,
 	/* The Rtl8139-specific entries in the device structure. */
 	dev->open = rtl8139_open;
 	dev->hard_start_xmit = rtl8139_start_xmit;
-	dev->poll = rtl8139_poll;
-	dev->weight = 64;
+	netif_napi_add(dev, &tp->napi, rtl8139_poll, 64);
 	dev->stop = rtl8139_close;
 	dev->get_stats = rtl8139_get_stats;
 	dev->set_multicast_list = rtl8139_set_rx_mode;
@@ -2103,39 +2105,32 @@ static void rtl8139_weird_interrupt (struct net_device *dev,
 	}
 }
 
-static int rtl8139_poll(struct net_device *dev, int *budget)
+static int rtl8139_poll(struct napi_struct *napi, int budget)
 {
-	struct rtl8139_private *tp = netdev_priv(dev);
+	struct rtl8139_private *tp = container_of(napi, struct rtl8139_private, napi);
+	struct net_device *dev = tp->dev;
 	void __iomem *ioaddr = tp->mmio_addr;
-	int orig_budget = min(*budget, dev->quota);
-	int done = 1;
+	int work_done;
 
 	spin_lock(&tp->rx_lock);
-	if (likely(RTL_R16(IntrStatus) & RxAckBits)) {
-		int work_done;
-
-		work_done = rtl8139_rx(dev, tp, orig_budget);
-		if (likely(work_done > 0)) {
-			*budget -= work_done;
-			dev->quota -= work_done;
-			done = (work_done < orig_budget);
-		}
-	}
+	work_done = 0;
+	if (likely(RTL_R16(IntrStatus) & RxAckBits))
+		work_done += rtl8139_rx(dev, tp, budget);
 
-	if (done) {
+	if (work_done < budget) {
 		unsigned long flags;
 		/*
 		 * Order is important since data can get interrupted
 		 * again when we think we are done.
 		 */
-		local_irq_save(flags);
+		spin_lock_irqsave(&tp->lock, flags);
 		RTL_W16_F(IntrMask, rtl8139_intr_mask);
-		__netif_rx_complete(dev);
-		local_irq_restore(flags);
+		__netif_rx_complete(dev, napi);
+		spin_unlock_irqrestore(&tp->lock, flags);
 	}
 	spin_unlock(&tp->rx_lock);
 
-	return !done;
+	return work_done;
 }
 
 /* The interrupt handler does all of the Rx thread work and cleans up
@@ -2180,9 +2175,9 @@ static irqreturn_t rtl8139_interrupt (int irq, void *dev_instance)
 	/* Receive packets are processed by poll routine.
 	   If not running start it now. */
 	if (status & RxAckBits){
-		if (netif_rx_schedule_prep(dev)) {
+		if (netif_rx_schedule_prep(dev, &tp->napi)) {
 			RTL_W16_F (IntrMask, rtl8139_norx_intr_mask);
-			__netif_rx_schedule (dev);
+			__netif_rx_schedule(dev, &tp->napi);
 		}
 	}
 
@@ -2223,7 +2218,8 @@ static int rtl8139_close (struct net_device *dev)
 	void __iomem *ioaddr = tp->mmio_addr;
 	unsigned long flags;
 
-	netif_stop_queue (dev);
+	netif_stop_queue(dev);
+	napi_disable(&tp->napi);
 
 	if (netif_msg_ifdown(tp))
 		printk(KERN_DEBUG "%s: Shutting down ethercard, status was 0x%4.4x.\n",
diff --git a/drivers/net/amd8111e.c b/drivers/net/amd8111e.c
index a61b2f8..2f7af7c 100644
--- a/drivers/net/amd8111e.c
+++ b/drivers/net/amd8111e.c
@@ -723,9 +723,10 @@ static int amd8111e_tx(struct net_device *dev)
 
 #ifdef CONFIG_AMD8111E_NAPI
 /* This function handles the driver receive operation in polling mode */
-static int amd8111e_rx_poll(struct net_device *dev, int * budget)
+static int amd8111e_rx_poll(struct napi_struct *napi, int budget)
 {
-	struct amd8111e_priv *lp = netdev_priv(dev);
+	struct amd8111e_priv *lp = container_of(napi, struct amd8111e_priv, napi);
+	struct net_device *dev = lp->amd8111e_net_dev;
 	int rx_index = lp->rx_idx & RX_RING_DR_MOD_MASK;
 	void __iomem *mmio = lp->mmio;
 	struct sk_buff *skb,*new_skb;
@@ -737,7 +738,7 @@ static int amd8111e_rx_poll(struct net_device *dev, int * budget)
 #if AMD8111E_VLAN_TAG_USED
 	short vtag;
 #endif
-	int rx_pkt_limit = dev->quota;
+	int rx_pkt_limit = budget;
 	unsigned long flags;
 
 	do{
@@ -838,21 +839,14 @@ static int amd8111e_rx_poll(struct net_device *dev, int * budget)
 	} while(intr0 & RINT0);
 
 	/* Receive descriptor is empty now */
-	dev->quota -= num_rx_pkt;
-	*budget -= num_rx_pkt;
-
 	spin_lock_irqsave(&lp->lock, flags);
-	netif_rx_complete(dev);
+	__netif_rx_complete(dev, napi);
 	writel(VAL0|RINTEN0, mmio + INTEN0);
 	writel(VAL2 | RDMD0, mmio + CMD0);
 	spin_unlock_irqrestore(&lp->lock, flags);
-	return 0;
 
 rx_not_empty:
-	/* Do not call a netif_rx_complete */
-	dev->quota -= num_rx_pkt;
-	*budget -= num_rx_pkt;
-	return 1;
+	return num_rx_pkt;
 }
 
 #else
@@ -1287,11 +1281,11 @@ static irqreturn_t amd8111e_interrupt(int irq, void *dev_id)
 	/* Check if Receive Interrupt has occurred. */
 #ifdef CONFIG_AMD8111E_NAPI
 	if(intr0 & RINT0){
-		if(netif_rx_schedule_prep(dev)){
+		if(netif_rx_schedule_prep(dev, &lp->napi)){
 			/* Disable receive interupts */
 			writel(RINTEN0, mmio + INTEN0);
 			/* Schedule a polling routine */
-			__netif_rx_schedule(dev);
+			__netif_rx_schedule(dev, &lp->napi);
 		}
 		else if (intren0 & RINTEN0) {
 			printk("************Driver bug! \
@@ -1345,6 +1339,8 @@ static int amd8111e_close(struct net_device * dev)
 	struct amd8111e_priv *lp = netdev_priv(dev);
 	netif_stop_queue(dev);
 
+	napi_disable(&lp->napi);
+
 	spin_lock_irq(&lp->lock);
 
 	amd8111e_disable_interrupt(lp);
@@ -2031,8 +2027,7 @@ static int __devinit amd8111e_probe_one(struct pci_dev *pdev,
 	dev->tx_timeout = amd8111e_tx_timeout;
 	dev->watchdog_timeo = AMD8111E_TX_TIMEOUT;
 #ifdef CONFIG_AMD8111E_NAPI
-	dev->poll = amd8111e_rx_poll;
-	dev->weight = 32;
+	netif_napi_add(dev, &lp->napi, amd8111e_rx_poll, 32);
 #endif
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	dev->poll_controller = amd8111e_poll;
diff --git a/drivers/net/amd8111e.h b/drivers/net/amd8111e.h
index e65080a..612e653 100644
--- a/drivers/net/amd8111e.h
+++ b/drivers/net/amd8111e.h
@@ -763,6 +763,8 @@ struct amd8111e_priv{
 	/* Reg memory mapped address */
 	void __iomem *mmio;
 
+	struct napi_struct napi;
+
 	spinlock_t lock;	/* Guard lock */
 	unsigned long rx_idx, tx_idx;	/* The next free ring entry */
 	unsigned long tx_complete_idx;
diff --git a/drivers/net/arm/ep93xx_eth.c b/drivers/net/arm/ep93xx_eth.c
index f6ece1d..1d8737f 100644
--- a/drivers/net/arm/ep93xx_eth.c
+++ b/drivers/net/arm/ep93xx_eth.c
@@ -169,6 +169,9 @@ struct ep93xx_priv
 	spinlock_t		tx_pending_lock;
 	unsigned int		tx_pending;
 
+	struct net_device	*dev;
+	struct napi_struct	napi;
+
 	struct net_device_stats	stats;
 
 	struct mii_if_info	mii;
@@ -190,15 +193,11 @@ static struct net_device_stats *ep93xx_get_stats(struct net_device *dev)
 	return &(ep->stats);
 }
 
-static int ep93xx_rx(struct net_device *dev, int *budget)
+static int ep93xx_rx(struct net_device *dev, int processed, int budget)
 {
 	struct ep93xx_priv *ep = netdev_priv(dev);
-	int rx_done;
-	int processed;
 
-	rx_done = 0;
-	processed = 0;
-	while (*budget > 0) {
+	while (processed < budget) {
 		int entry;
 		struct ep93xx_rstat *rstat;
 		u32 rstat0;
@@ -211,10 +210,8 @@ static int ep93xx_rx(struct net_device *dev, int *budget)
 
 		rstat0 = rstat->rstat0;
 		rstat1 = rstat->rstat1;
-		if (!(rstat0 & RSTAT0_RFP) || !(rstat1 & RSTAT1_RFP)) {
-			rx_done = 1;
+		if (!(rstat0 & RSTAT0_RFP) || !(rstat1 & RSTAT1_RFP))
 			break;
-		}
 
 		rstat->rstat0 = 0;
 		rstat->rstat1 = 0;
@@ -275,8 +272,6 @@ static int ep93xx_rx(struct net_device *dev, int *budget)
 err:
 		ep->rx_pointer = (entry + 1) & (RX_QUEUE_ENTRIES - 1);
 		processed++;
-		dev->quota--;
-		(*budget)--;
 	}
 
 	if (processed) {
@@ -284,7 +279,7 @@ err:
 		wrw(ep, REG_RXSTSENQ, processed);
 	}
 
-	return !rx_done;
+	return processed;
 }
 
 static int ep93xx_have_more_rx(struct ep93xx_priv *ep)
@@ -293,36 +288,32 @@ static int ep93xx_have_more_rx(struct ep93xx_priv *ep)
 	return !!((rstat->rstat0 & RSTAT0_RFP) && (rstat->rstat1 & RSTAT1_RFP));
 }
 
-static int ep93xx_poll(struct net_device *dev, int *budget)
+static int ep93xx_poll(struct napi_struct *napi, int budget)
 {
-	struct ep93xx_priv *ep = netdev_priv(dev);
-
-	/*
-	 * @@@ Have to stop polling if device is downed while we
-	 * are polling.
-	 */
+	struct ep93xx_priv *ep = container_of(napi, struct ep93xx_priv, napi);
+	struct net_device *dev = ep->dev;
+	int rx = 0;
 
 poll_some_more:
-	if (ep93xx_rx(dev, budget))
-		return 1;
-
-	netif_rx_complete(dev);
-
-	spin_lock_irq(&ep->rx_lock);
-	wrl(ep, REG_INTEN, REG_INTEN_TX | REG_INTEN_RX);
-	if (ep93xx_have_more_rx(ep)) {
-		wrl(ep, REG_INTEN, REG_INTEN_TX);
-		wrl(ep, REG_INTSTSP, REG_INTSTS_RX);
+	rx = ep93xx_rx(dev, rx, budget);
+	if (rx < budget) {
+		int more = 0;
+
+		spin_lock_irq(&ep->rx_lock);
+		__netif_rx_complete(dev, napi);
+		wrl(ep, REG_INTEN, REG_INTEN_TX | REG_INTEN_RX);
+		if (ep93xx_have_more_rx(ep)) {
+			wrl(ep, REG_INTEN, REG_INTEN_TX);
+			wrl(ep, REG_INTSTSP, REG_INTSTS_RX);
+			more = 1;
+		}
 		spin_unlock_irq(&ep->rx_lock);
 
-		if (netif_rx_reschedule(dev, 0))
+		if (more && netif_rx_reschedule(napi))
 			goto poll_some_more;
-
-		return 0;
 	}
-	spin_unlock_irq(&ep->rx_lock);
 
-	return 0;
+	return rx;
 }
 
 static int ep93xx_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -426,9 +417,9 @@ static irqreturn_t ep93xx_irq(int irq, void *dev_id)
 
 	if (status & REG_INTSTS_RX) {
 		spin_lock(&ep->rx_lock);
-		if (likely(__netif_rx_schedule_prep(dev))) {
+		if (likely(__netif_rx_schedule_prep(dev, &ep->napi))) {
 			wrl(ep, REG_INTEN, REG_INTEN_TX);
-			__netif_rx_schedule(dev);
+			__netif_rx_schedule(dev, &ep->napi);
 		}
 		spin_unlock(&ep->rx_lock);
 	}
@@ -788,14 +779,12 @@ struct net_device *ep93xx_dev_alloc(struct ep93xx_eth_data *data)
 
 	dev->get_stats = ep93xx_get_stats;
 	dev->ethtool_ops = &ep93xx_ethtool_ops;
-	dev->poll = ep93xx_poll;
 	dev->hard_start_xmit = ep93xx_xmit;
 	dev->open = ep93xx_open;
 	dev->stop = ep93xx_close;
 	dev->do_ioctl = ep93xx_ioctl;
 
 	dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
-	dev->weight = 64;
 
 	return dev;
 }
@@ -847,6 +836,8 @@ static int ep93xx_eth_probe(struct platform_device *pdev)
 		goto err_out;
 	}
 	ep = netdev_priv(dev);
+	ep->dev = dev;
+	netif_napi_add(dev, &ep->napi, ep93xx_poll, 64);
 
 	platform_set_drvdata(pdev, dev);
 
diff --git a/drivers/net/b44.c b/drivers/net/b44.c
index 0795df2..60b3d56 100644
--- a/drivers/net/b44.c
+++ b/drivers/net/b44.c
@@ -848,10 +848,11 @@ static int b44_rx(struct b44 *bp, int budget)
 	return received;
 }
 
-static int b44_poll(struct net_device *netdev, int *budget)
+static int b44_poll(struct napi_struct *napi, int budget)
 {
-	struct b44 *bp = netdev_priv(netdev);
-	int done;
+	struct b44 *bp = container_of(napi, struct b44, napi);
+	struct net_device *netdev = bp->dev;
+	int work_done;
 
 	spin_lock_irq(&bp->lock);
 
@@ -862,22 +863,9 @@ static int b44_poll(struct net_device *netdev, int *budget)
 	}
 	spin_unlock_irq(&bp->lock);
 
-	done = 1;
-	if (bp->istat & ISTAT_RX) {
-		int orig_budget = *budget;
-		int work_done;
-
-		if (orig_budget > netdev->quota)
-			orig_budget = netdev->quota;
-
-		work_done = b44_rx(bp, orig_budget);
-
-		*budget -= work_done;
-		netdev->quota -= work_done;
-
-		if (work_done >= orig_budget)
-			done = 0;
-	}
+	work_done = 0;
+	if (bp->istat & ISTAT_RX)
+		work_done += b44_rx(bp, budget);
 
 	if (bp->istat & ISTAT_ERRORS) {
 		unsigned long flags;
@@ -888,15 +876,15 @@ static int b44_poll(struct net_device *netdev, int *budget)
 		b44_init_hw(bp, B44_FULL_RESET_SKIP_PHY);
 		netif_wake_queue(bp->dev);
 		spin_unlock_irqrestore(&bp->lock, flags);
-		done = 1;
+		work_done = 0;
 	}
 
-	if (done) {
-		netif_rx_complete(netdev);
+	if (work_done < budget) {
+		netif_rx_complete(netdev, napi);
 		b44_enable_ints(bp);
 	}
 
-	return (done ? 0 : 1);
+	return work_done;
 }
 
 static irqreturn_t b44_interrupt(int irq, void *dev_id)
@@ -924,13 +912,13 @@ static irqreturn_t b44_interrupt(int irq, void *dev_id)
 			goto irq_ack;
 		}
 
-		if (netif_rx_schedule_prep(dev)) {
+		if (netif_rx_schedule_prep(dev, &bp->napi)) {
 			/* NOTE: These writes are posted by the readback of
 			 *       the ISTAT register below.
 			 */
 			bp->istat = istat;
 			__b44_disable_ints(bp);
-			__netif_rx_schedule(dev);
+			__netif_rx_schedule(dev, &bp->napi);
 		} else {
 			printk(KERN_ERR PFX "%s: Error, poll already scheduled\n",
 			       dev->name);
@@ -1609,7 +1597,7 @@ static int b44_close(struct net_device *dev)
 
 	netif_stop_queue(dev);
 
-	netif_poll_disable(dev);
+	napi_disable(&bp->napi);
 
 	del_timer_sync(&bp->timer);
 
@@ -1626,7 +1614,7 @@ static int b44_close(struct net_device *dev)
 
 	free_irq(dev->irq, dev);
 
-	netif_poll_enable(dev);
+	napi_enable(&bp->napi);
 
 	if (bp->flags & B44_FLAG_WOL_ENABLE) {
 		b44_init_hw(bp, B44_PARTIAL_RESET);
@@ -2194,8 +2182,7 @@ static int __devinit b44_init_one(struct pci_dev *pdev,
 	dev->set_mac_address = b44_set_mac_addr;
 	dev->do_ioctl = b44_ioctl;
 	dev->tx_timeout = b44_tx_timeout;
-	dev->poll = b44_poll;
-	dev->weight = 64;
+	netif_napi_add(dev, &bp->napi, b44_poll, 64);
 	dev->watchdog_timeo = B44_TX_TIMEOUT;
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	dev->poll_controller = b44_poll_controller;
diff --git a/drivers/net/b44.h b/drivers/net/b44.h
index e537e63..63c55a4 100644
--- a/drivers/net/b44.h
+++ b/drivers/net/b44.h
@@ -423,6 +423,8 @@ struct b44 {
 	struct ring_info	*rx_buffers;
 	struct ring_info	*tx_buffers;
 
+	struct napi_struct	napi;
+
 	u32			dma_offset;
 	u32			flags;
 #define B44_FLAG_B0_ANDLATER	0x00000001
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 24e7f9a..3fcff65 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -428,7 +428,7 @@ bnx2_netif_stop(struct bnx2 *bp)
 {
 	bnx2_disable_int_sync(bp);
 	if (netif_running(bp->dev)) {
-		netif_poll_disable(bp->dev);
+		napi_disable(&bp->napi);
 		netif_tx_disable(bp->dev);
 		bp->dev->trans_start = jiffies;	/* prevent tx timeout */
 	}
@@ -440,7 +440,7 @@ bnx2_netif_start(struct bnx2 *bp)
 	if (atomic_dec_and_test(&bp->intr_sem)) {
 		if (netif_running(bp->dev)) {
 			netif_wake_queue(bp->dev);
-			netif_poll_enable(bp->dev);
+			napi_enable(&bp->napi);
 			bnx2_enable_int(bp);
 		}
 	}
@@ -2551,7 +2551,7 @@ bnx2_msi(int irq, void *dev_instance)
 	if (unlikely(atomic_read(&bp->intr_sem) != 0))
 		return IRQ_HANDLED;
 
-	netif_rx_schedule(dev);
+	netif_rx_schedule(dev, &bp->napi);
 
 	return IRQ_HANDLED;
 }
@@ -2568,7 +2568,7 @@ bnx2_msi_1shot(int irq, void *dev_instance)
 	if (unlikely(atomic_read(&bp->intr_sem) != 0))
 		return IRQ_HANDLED;
 
-	netif_rx_schedule(dev);
+	netif_rx_schedule(dev, &bp->napi);
 
 	return IRQ_HANDLED;
 }
@@ -2604,9 +2604,9 @@ bnx2_interrupt(int irq, void *dev_instance)
 	if (unlikely(atomic_read(&bp->intr_sem) != 0))
 		return IRQ_HANDLED;
 
-	if (netif_rx_schedule_prep(dev)) {
+	if (netif_rx_schedule_prep(dev, &bp->napi)) {
 		bp->last_status_idx = sblk->status_idx;
-		__netif_rx_schedule(dev);
+		__netif_rx_schedule(dev, &bp->napi);
 	}
 
 	return IRQ_HANDLED;
@@ -2632,12 +2632,14 @@ bnx2_has_work(struct bnx2 *bp)
 }
 
 static int
-bnx2_poll(struct net_device *dev, int *budget)
+bnx2_poll(struct napi_struct *napi, int budget)
 {
-	struct bnx2 *bp = netdev_priv(dev);
+	struct bnx2 *bp = container_of(napi, struct bnx2, napi);
+	struct net_device *dev = bp->dev;
 	struct status_block *sblk = bp->status_blk;
 	u32 status_attn_bits = sblk->status_attn_bits;
 	u32 status_attn_bits_ack = sblk->status_attn_bits_ack;
+	int work_done = 0;
 
 	if ((status_attn_bits & STATUS_ATTN_EVENTS) !=
 	    (status_attn_bits_ack & STATUS_ATTN_EVENTS)) {
@@ -2655,23 +2657,14 @@ bnx2_poll(struct net_device *dev, int *budget)
 	if (bp->status_blk->status_tx_quick_consumer_index0 != bp->hw_tx_cons)
 		bnx2_tx_int(bp);
 
-	if (bp->status_blk->status_rx_quick_consumer_index0 != bp->hw_rx_cons) {
-		int orig_budget = *budget;
-		int work_done;
-
-		if (orig_budget > dev->quota)
-			orig_budget = dev->quota;
-
-		work_done = bnx2_rx_int(bp, orig_budget);
-		*budget -= work_done;
-		dev->quota -= work_done;
-	}
+	if (bp->status_blk->status_rx_quick_consumer_index0 != bp->hw_rx_cons)
+		work_done = bnx2_rx_int(bp, budget);
 
 	bp->last_status_idx = bp->status_blk->status_idx;
 	rmb();
 
 	if (!bnx2_has_work(bp)) {
-		netif_rx_complete(dev);
+		netif_rx_complete(dev, napi);
 		if (likely(bp->flags & USING_MSI_FLAG)) {
 			REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD,
 			       BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID |
@@ -2686,10 +2679,9 @@ bnx2_poll(struct net_device *dev, int *budget)
 		REG_WR(bp, BNX2_PCICFG_INT_ACK_CMD,
 		       BNX2_PCICFG_INT_ACK_CMD_INDEX_VALID |
 		       bp->last_status_idx);
-		return 0;
 	}
 
-	return 1;
+	return work_done;
 }
 
 /* Called with rtnl_lock from vlan functions and also netif_tx_lock
@@ -5292,6 +5284,8 @@ bnx2_close(struct net_device *dev)
 	struct bnx2 *bp = netdev_priv(dev);
 	u32 reset_code;
 
+	napi_disable(&bp->napi);
+
 	/* Calling flush_scheduled_work() may deadlock because
 	 * linkwatch_event() may be on the workqueue and it will try to get
 	 * the rtnl_lock which we are holding.
@@ -6855,11 +6849,10 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 #ifdef BCM_VLAN
 	dev->vlan_rx_register = bnx2_vlan_rx_register;
 #endif
-	dev->poll = bnx2_poll;
 	dev->ethtool_ops = &bnx2_ethtool_ops;
-	dev->weight = 64;
 
 	bp = netdev_priv(dev);
+	netif_napi_add(dev, &bp->napi, bnx2_poll, 64);
 
 #if defined(HAVE_POLL_CONTROLLER) || defined(CONFIG_NET_POLL_CONTROLLER)
 	dev->poll_controller = poll_bnx2;
diff --git a/drivers/net/bnx2.h b/drivers/net/bnx2.h
index 102adfe..fbae439 100644
--- a/drivers/net/bnx2.h
+++ b/drivers/net/bnx2.h
@@ -6473,6 +6473,8 @@ struct bnx2 {
 	struct net_device	*dev;
 	struct pci_dev		*pdev;
 
+	struct napi_struct	napi;
+
 	atomic_t		intr_sem;
 
 	struct status_block	*status_blk;
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index f6e4030..ffcefa2 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -2485,7 +2485,7 @@ static irqreturn_t cas_interruptN(int irq, void *dev_id)
 	if (status & INTR_RX_DONE_ALT) { /* handle rx separately */
 #ifdef USE_NAPI
 		cas_mask_intr(cp);
-		netif_rx_schedule(dev);
+		netif_rx_schedule(dev, &cp->napi);
 #else
 		cas_rx_ringN(cp, ring, 0);
 #endif
@@ -2536,7 +2536,7 @@ static irqreturn_t cas_interrupt1(int irq, void *dev_id)
 	if (status & INTR_RX_DONE_ALT) { /* handle rx separately */
 #ifdef USE_NAPI
 		cas_mask_intr(cp);
-		netif_rx_schedule(dev);
+		netif_rx_schedule(dev, &cp->napi);
 #else
 		cas_rx_ringN(cp, 1, 0);
 #endif
@@ -2592,7 +2592,7 @@ static irqreturn_t cas_interrupt(int irq, void *dev_id)
 	if (status & INTR_RX_DONE) {
 #ifdef USE_NAPI
 		cas_mask_intr(cp);
-		netif_rx_schedule(dev);
+		netif_rx_schedule(dev, &cp->napi);
 #else
 		cas_rx_ringN(cp, 0, 0);
 #endif
@@ -2607,9 +2607,10 @@ static irqreturn_t cas_interrupt(int irq, void *dev_id)
 
 
 #ifdef USE_NAPI
-static int cas_poll(struct net_device *dev, int *budget)
+static int cas_poll(struct napi_struct *napi, int budget)
 {
-	struct cas *cp = netdev_priv(dev);
+	struct cas *cp = container_of(napi, struct cas, napi);
+	struct net_device *dev = cp->dev;
 	int i, enable_intr, todo, credits;
 	u32 status = readl(cp->regs + REG_INTR_STATUS);
 	unsigned long flags;
@@ -2620,20 +2621,18 @@ static int cas_poll(struct net_device *dev, int *budget)
 
 	/* NAPI rx packets. we spread the credits across all of the
 	 * rxc rings
-	 */
-	todo = min(*budget, dev->quota);
-
-	/* to make sure we're fair with the work we loop through each
+	 *
+	 * to make sure we're fair with the work we loop through each
 	 * ring N_RX_COMP_RING times with a request of
-	 * todo / N_RX_COMP_RINGS
+	 * budget / N_RX_COMP_RINGS
 	 */
 	enable_intr = 1;
 	credits = 0;
 	for (i = 0; i < N_RX_COMP_RINGS; i++) {
 		int j;
 		for (j = 0; j < N_RX_COMP_RINGS; j++) {
-			credits += cas_rx_ringN(cp, j, todo / N_RX_COMP_RINGS);
-			if (credits >= todo) {
+			credits += cas_rx_ringN(cp, j, budget / N_RX_COMP_RINGS);
+			if (credits >= budget) {
 				enable_intr = 0;
 				goto rx_comp;
 			}
@@ -2641,9 +2640,6 @@ static int cas_poll(struct net_device *dev, int *budget)
 	}
 
 rx_comp:
-	*budget    -= credits;
-	dev->quota -= credits;
-
 	/* final rx completion */
 	spin_lock_irqsave(&cp->lock, flags);
 	if (status)
@@ -2674,11 +2670,10 @@ rx_comp:
 #endif
 	spin_unlock_irqrestore(&cp->lock, flags);
 	if (enable_intr) {
-		netif_rx_complete(dev);
+		netif_rx_complete(dev, napi);
 		cas_unmask_intr(cp);
-		return 0;
 	}
-	return 1;
+	return credits;
 }
 #endif
 
@@ -5062,8 +5057,7 @@ static int __devinit cas_init_one(struct pci_dev *pdev,
 	dev->watchdog_timeo = CAS_TX_TIMEOUT;
 	dev->change_mtu = cas_change_mtu;
 #ifdef USE_NAPI
-	dev->poll = cas_poll;
-	dev->weight = 64;
+	netif_napi_add(dev, &cp->napi, cas_poll, 64);
 #endif
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	dev->poll_controller = cas_netpoll;
diff --git a/drivers/net/cassini.h b/drivers/net/cassini.h
index a970804..2f93f83 100644
--- a/drivers/net/cassini.h
+++ b/drivers/net/cassini.h
@@ -4280,6 +4280,8 @@ struct cas {
 	int rx_cur[N_RX_COMP_RINGS], rx_new[N_RX_COMP_RINGS];
 	int rx_last[N_RX_DESC_RINGS];
 
+	struct napi_struct napi;
+
 	/* Set when chip is actually in operational state
 	 * (ie. not power managed) */
 	int hw_running;
diff --git a/drivers/net/chelsio/common.h b/drivers/net/chelsio/common.h
index 8ba702c..b5de445 100644
--- a/drivers/net/chelsio/common.h
+++ b/drivers/net/chelsio/common.h
@@ -278,6 +278,7 @@ struct adapter {
 	struct peespi *espi;
 	struct petp   *tp;
 
+	struct napi_struct napi;
 	struct port_info port[MAX_NPORTS];
 	struct delayed_work stats_update_task;
 	struct timer_list stats_update_timer;
diff --git a/drivers/net/chelsio/cxgb2.c b/drivers/net/chelsio/cxgb2.c
index 231ce43..66da0c7 100644
--- a/drivers/net/chelsio/cxgb2.c
+++ b/drivers/net/chelsio/cxgb2.c
@@ -274,6 +274,7 @@ static int cxgb_close(struct net_device *dev)
 	struct cmac *mac = p->mac;
 
 	netif_stop_queue(dev);
+	napi_disable(&adapter->napi);
 	mac->ops->disable(mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
 	netif_carrier_off(dev);
 
@@ -1113,8 +1114,7 @@ static int __devinit init_one(struct pci_dev *pdev,
 		netdev->poll_controller = t1_netpoll;
 #endif
 #ifdef CONFIG_CHELSIO_T1_NAPI
-		netdev->weight = 64;
-		netdev->poll = t1_poll;
+		netif_napi_add(netdev, &adapter->napi, t1_poll, 64);
 #endif
 
 		SET_ETHTOOL_OPS(netdev, &t1_ethtool_ops);
diff --git a/drivers/net/chelsio/sge.c b/drivers/net/chelsio/sge.c
index e4f874a..0e5504d 100644
--- a/drivers/net/chelsio/sge.c
+++ b/drivers/net/chelsio/sge.c
@@ -1620,23 +1620,20 @@ static int process_pure_responses(struct adapter *adapter)
  * or protection from interrupts as data interrupts are off at this point and
  * other adapter interrupts do not interfere.
  */
-int t1_poll(struct net_device *dev, int *budget)
+int t1_poll(struct napi_struct *napi, int budget)
 {
-	struct adapter *adapter = dev->priv;
+	struct adapter *adapter = container_of(napi, struct adapter, napi); 
+	struct net_device *dev = adapter->port[0].dev;
 	int work_done;
 
-	work_done = process_responses(adapter, min(*budget, dev->quota));
-	*budget -= work_done;
-	dev->quota -= work_done;
-
-	if (unlikely(responses_pending(adapter)))
-		return 1;
-
-	netif_rx_complete(dev);
-	writel(adapter->sge->respQ.cidx, adapter->regs + A_SG_SLEEPING);
-
-	return 0;
+	work_done = process_responses(adapter, budget);
 
+	if (likely(!responses_pending(adapter))) {
+		netif_rx_complete(dev, napi);
+		writel(adapter->sge->respQ.cidx,
+		       adapter->regs + A_SG_SLEEPING);
+	}
+	return work_done;
 }
 
 /*
@@ -1653,13 +1650,13 @@ irqreturn_t t1_interrupt(int irq, void *data)
 
 		writel(F_PL_INTR_SGE_DATA, adapter->regs + A_PL_CAUSE);
 
-		if (__netif_rx_schedule_prep(dev)) {
+		if (napi_schedule_prep(&adapter->napi)) {
 			if (process_pure_responses(adapter))
-				__netif_rx_schedule(dev);
+				__netif_rx_schedule(dev, &adapter->napi);
 			else {
 				/* no data, no NAPI needed */
 				writel(sge->respQ.cidx, adapter->regs + A_SG_SLEEPING);
-				netif_poll_enable(dev);	/* undo schedule_prep */
+				napi_enable(&adapter->napi);	/* undo schedule_prep */
 			}
 		}
 		return IRQ_HANDLED;
diff --git a/drivers/net/chelsio/sge.h b/drivers/net/chelsio/sge.h
index d132a0e..713d9c5 100644
--- a/drivers/net/chelsio/sge.h
+++ b/drivers/net/chelsio/sge.h
@@ -77,7 +77,7 @@ int t1_sge_configure(struct sge *, struct sge_params *);
 int t1_sge_set_coalesce_params(struct sge *, struct sge_params *);
 void t1_sge_destroy(struct sge *);
 irqreturn_t t1_interrupt(int irq, void *cookie);
-int t1_poll(struct net_device *, int *);
+int t1_poll(struct napi_struct *, int);
 
 int t1_start_xmit(struct sk_buff *skb, struct net_device *dev);
 void t1_set_vlan_accel(struct adapter *adapter, int on_off);
diff --git a/drivers/net/cxgb3/adapter.h b/drivers/net/cxgb3/adapter.h
index ab72563..e723e7b 100644
--- a/drivers/net/cxgb3/adapter.h
+++ b/drivers/net/cxgb3/adapter.h
@@ -50,7 +50,9 @@ typedef irqreturn_t(*intr_handler_t) (int, void *);
 
 struct vlan_group;
 
+struct adapter;
 struct port_info {
+	struct adapter *adapter;
 	struct vlan_group *vlan_grp;
 	const struct port_type_info *port_type;
 	u8 port_id;
@@ -170,11 +172,13 @@ enum {				/* per port SGE statistics */
 	SGE_PSTAT_MAX		/* must be last */
 };
 
+struct adapter_napi;
 struct sge_qset {		/* an SGE queue set */
 	struct sge_rspq rspq;
 	struct sge_fl fl[SGE_RXQ_PER_SET];
 	struct sge_txq txq[SGE_TXQ_PER_SET];
-	struct net_device *netdev;	/* associated net device */
+	struct adapter_napi *anapi;
+	struct net_device *netdev;
 	unsigned long txq_stopped;	/* which Tx queues are stopped */
 	struct timer_list tx_reclaim_timer;	/* reclaims TX buffers */
 	unsigned long port_stats[SGE_PSTAT_MAX];
@@ -185,6 +189,13 @@ struct sge {
 	spinlock_t reg_lock;	/* guards non-atomic SGE registers (eg context) */
 };
 
+struct adapter_napi {
+	struct napi_struct napi;
+	struct adapter *adapter;
+	int port;
+	int qset;
+};
+
 struct adapter {
 	struct t3cdev tdev;
 	struct list_head adapter_list;
@@ -219,11 +230,7 @@ struct adapter {
 	struct delayed_work adap_check_task;
 	struct work_struct ext_intr_handler_task;
 
-	/*
-	 * Dummy netdevices are needed when using multiple receive queues with
-	 * NAPI as each netdevice can service only one queue.
-	 */
-	struct net_device *dummy_netdev[SGE_QSETS - 1];
+	struct adapter_napi napi[SGE_QSETS];
 
 	struct dentry *debugfs_root;
 
@@ -251,12 +258,6 @@ static inline struct port_info *adap2pinfo(struct adapter *adap, int idx)
 	return netdev_priv(adap->port[idx]);
 }
 
-/*
- * We use the spare atalk_ptr to map a net device to its SGE queue set.
- * This is a macro so it can be used as l-value.
- */
-#define dev2qset(netdev) ((netdev)->atalk_ptr)
-
 #define OFFLOAD_DEVMAP_BIT 15
 
 #define tdev2adap(d) container_of(d, struct adapter, tdev)
@@ -282,7 +283,8 @@ int t3_mgmt_tx(struct adapter *adap, struct sk_buff *skb);
 void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p);
 int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
 		      int irq_vec_idx, const struct qset_params *p,
-		      int ntxq, struct net_device *netdev);
+		      int ntxq, struct adapter_napi *anapi,
+		      struct net_device *dev);
 int t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
 		unsigned char *data);
 irqreturn_t t3_sge_intr_msix(int irq, void *cookie);
diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
index dc5d269..b59fd61 100644
--- a/drivers/net/cxgb3/cxgb3_main.c
+++ b/drivers/net/cxgb3/cxgb3_main.c
@@ -339,46 +339,24 @@ static void setup_rss(struct adapter *adap)
 		      V_RRCPLCPUSIZE(6), cpus, rspq_map);
 }
 
-/*
- * If we have multiple receive queues per port serviced by NAPI we need one
- * netdevice per queue as NAPI operates on netdevices.  We already have one
- * netdevice, namely the one associated with the interface, so we use dummy
- * ones for any additional queues.  Note that these netdevices exist purely
- * so that NAPI has something to work with, they do not represent network
- * ports and are not registered.
- */
-static int init_dummy_netdevs(struct adapter *adap)
+static int init_napi(struct adapter *adap)
 {
-	int i, j, dummy_idx = 0;
-	struct net_device *nd;
+	int i, j, napi_idx = 0;
 
 	for_each_port(adap, i) {
 		struct net_device *dev = adap->port[i];
 		const struct port_info *pi = netdev_priv(dev);
 
 		for (j = 0; j < pi->nqsets - 1; j++) {
-			if (!adap->dummy_netdev[dummy_idx]) {
-				nd = alloc_netdev(0, "", ether_setup);
-				if (!nd)
-					goto free_all;
-
-				nd->priv = adap;
-				nd->weight = 64;
-				set_bit(__LINK_STATE_START, &nd->state);
-				adap->dummy_netdev[dummy_idx] = nd;
-			}
-			strcpy(adap->dummy_netdev[dummy_idx]->name, dev->name);
-			dummy_idx++;
+			netif_napi_add(dev, &adap->napi[napi_idx].napi,
+				       NULL, 64);
+			adap->napi[napi_idx].adapter = adap;
+			adap->napi[napi_idx].port = i;
+			adap->napi[napi_idx].qset = j;
+			napi_idx++;
 		}
 	}
 	return 0;
-
-free_all:
-	while (--dummy_idx >= 0) {
-		free_netdev(adap->dummy_netdev[dummy_idx]);
-		adap->dummy_netdev[dummy_idx] = NULL;
-	}
-	return -ENOMEM;
 }
 
 /*
@@ -389,19 +367,10 @@ free_all:
 static void quiesce_rx(struct adapter *adap)
 {
 	int i;
-	struct net_device *dev;
 
-	for_each_port(adap, i) {
-		dev = adap->port[i];
-		while (test_bit(__LINK_STATE_RX_SCHED, &dev->state))
-			msleep(1);
-	}
-
-	for (i = 0; i < ARRAY_SIZE(adap->dummy_netdev); i++) {
-		dev = adap->dummy_netdev[i];
-		if (dev)
-			while (test_bit(__LINK_STATE_RX_SCHED, &dev->state))
-				msleep(1);
+	for (i = 0; i < ARRAY_SIZE(adap->napi); i++) {
+		struct adapter_napi *anapi = &adap->napi[i];
+		napi_disable(&anapi->napi);
 	}
 }
 
@@ -415,7 +384,7 @@ static void quiesce_rx(struct adapter *adap)
  */
 static int setup_sge_qsets(struct adapter *adap)
 {
-	int i, j, err, irq_idx = 0, qset_idx = 0, dummy_dev_idx = 0;
+	int i, j, err, irq_idx = 0, qset_idx = 0, anapi_idx = 0;
 	unsigned int ntxq = SGE_TXQ_PER_SET;
 
 	if (adap->params.rev > 0 && !(adap->flags & USING_MSI))
@@ -426,12 +395,15 @@ static int setup_sge_qsets(struct adapter *adap)
 		const struct port_info *pi = netdev_priv(dev);
 
 		for (j = 0; j < pi->nqsets; ++j, ++qset_idx) {
+			struct adapter_napi *anapi;
+
+			anapi = &adap->napi[anapi_idx++];
+
 			err = t3_sge_alloc_qset(adap, qset_idx, 1,
 				(adap->flags & USING_MSIX) ? qset_idx + 1 :
 							     irq_idx,
 				&adap->params.sge.qset[qset_idx], ntxq,
-				j == 0 ? dev :
-					 adap-> dummy_netdev[dummy_dev_idx++]);
+				anapi, dev);
 			if (err) {
 				t3_free_sge_resources(adap);
 				return err;
@@ -482,7 +454,8 @@ static ssize_t attr_store(struct device *d, struct device_attribute *attr,
 #define CXGB3_SHOW(name, val_expr) \
 static ssize_t format_##name(struct net_device *dev, char *buf) \
 { \
-	struct adapter *adap = dev->priv; \
+	struct port_info *pi = netdev_priv(dev); \
+	struct adapter *adap = pi->adapter; \
 	return sprintf(buf, "%u\n", val_expr); \
 } \
 static ssize_t show_##name(struct device *d, struct device_attribute *attr, \
@@ -493,7 +466,8 @@ static ssize_t show_##name(struct device *d, struct device_attribute *attr, \
 
 static ssize_t set_nfilters(struct net_device *dev, unsigned int val)
 {
-	struct adapter *adap = dev->priv;
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adap = pi->adapter;
 	int min_tids = is_offload(adap) ? MC5_MIN_TIDS : 0;
 
 	if (adap->flags & FULL_INIT_DONE)
@@ -515,7 +489,8 @@ static ssize_t store_nfilters(struct device *d, struct device_attribute *attr,
 
 static ssize_t set_nservers(struct net_device *dev, unsigned int val)
 {
-	struct adapter *adap = dev->priv;
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adap = pi->adapter;
 
 	if (adap->flags & FULL_INIT_DONE)
 		return -EBUSY;
@@ -556,9 +531,10 @@ static struct attribute_group cxgb3_attr_group = {.attrs = cxgb3_attrs };
 static ssize_t tm_attr_show(struct device *d, struct device_attribute *attr,
 			    char *buf, int sched)
 {
-	ssize_t len;
+	struct port_info *pi = netdev_priv(to_net_dev(d));
+	struct adapter *adap = pi->adapter;
 	unsigned int v, addr, bpt, cpt;
-	struct adapter *adap = to_net_dev(d)->priv;
+	ssize_t len;
 
 	addr = A_TP_TX_MOD_Q1_Q0_RATE_LIMIT - sched / 2;
 	rtnl_lock();
@@ -581,10 +557,11 @@ static ssize_t tm_attr_show(struct device *d, struct device_attribute *attr,
 static ssize_t tm_attr_store(struct device *d, struct device_attribute *attr,
 			     const char *buf, size_t len, int sched)
 {
+	struct port_info *pi = netdev_priv(to_net_dev(d));
+	struct adapter *adap = pi->adapter;
+	unsigned int val;
 	char *endp;
 	ssize_t ret;
-	unsigned int val;
-	struct adapter *adap = to_net_dev(d)->priv;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
@@ -763,7 +740,7 @@ static int cxgb_up(struct adapter *adap)
 		if (err)
 			goto out;
 
-		err = init_dummy_netdevs(adap);
+		err = init_napi(adap);
 		if (err)
 			goto out;
 
@@ -858,7 +835,8 @@ static void schedule_chk_task(struct adapter *adap)
 
 static int offload_open(struct net_device *dev)
 {
-	struct adapter *adapter = dev->priv;
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adapter = pi->adapter;
 	struct t3cdev *tdev = T3CDEV(dev);
 	int adap_up = adapter->open_device_map & PORT_MASK;
 	int err = 0;
@@ -924,10 +902,10 @@ static int offload_close(struct t3cdev *tdev)
 
 static int cxgb_open(struct net_device *dev)
 {
-	int err;
-	struct adapter *adapter = dev->priv;
 	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adapter = pi->adapter;
 	int other_ports = adapter->open_device_map & PORT_MASK;
+	int err;
 
 	if (!adapter->open_device_map && (err = cxgb_up(adapter)) < 0)
 		return err;
@@ -951,17 +929,17 @@ static int cxgb_open(struct net_device *dev)
 
 static int cxgb_close(struct net_device *dev)
 {
-	struct adapter *adapter = dev->priv;
-	struct port_info *p = netdev_priv(dev);
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adapter = pi->adapter;
 
-	t3_port_intr_disable(adapter, p->port_id);
+	t3_port_intr_disable(adapter, pi->port_id);
 	netif_stop_queue(dev);
-	p->phy.ops->power_down(&p->phy, 1);
+	pi->phy.ops->power_down(&pi->phy, 1);
 	netif_carrier_off(dev);
-	t3_mac_disable(&p->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
+	t3_mac_disable(&pi->mac, MAC_DIRECTION_TX | MAC_DIRECTION_RX);
 
 	spin_lock(&adapter->work_lock);	/* sync with update task */
-	clear_bit(p->port_id, &adapter->open_device_map);
+	clear_bit(pi->port_id, &adapter->open_device_map);
 	spin_unlock(&adapter->work_lock);
 
 	if (!(adapter->open_device_map & PORT_MASK))
@@ -976,13 +954,13 @@ static int cxgb_close(struct net_device *dev)
 
 static struct net_device_stats *cxgb_get_stats(struct net_device *dev)
 {
-	struct adapter *adapter = dev->priv;
-	struct port_info *p = netdev_priv(dev);
-	struct net_device_stats *ns = &p->netstats;
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adapter = pi->adapter;
+	struct net_device_stats *ns = &pi->netstats;
 	const struct mac_stats *pstats;
 
 	spin_lock(&adapter->stats_lock);
-	pstats = t3_mac_update_stats(&p->mac);
+	pstats = t3_mac_update_stats(&pi->mac);
 	spin_unlock(&adapter->stats_lock);
 
 	ns->tx_bytes = pstats->tx_octets;
@@ -1015,14 +993,16 @@ static struct net_device_stats *cxgb_get_stats(struct net_device *dev)
 
 static u32 get_msglevel(struct net_device *dev)
 {
-	struct adapter *adapter = dev->priv;
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adapter = pi->adapter;
 
 	return adapter->msg_enable;
 }
 
 static void set_msglevel(struct net_device *dev, u32 val)
 {
-	struct adapter *adapter = dev->priv;
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adapter = pi->adapter;
 
 	adapter->msg_enable = val;
 }
@@ -1096,8 +1076,9 @@ static int get_eeprom_len(struct net_device *dev)
 
 static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 {
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adapter = pi->adapter;
 	u32 fw_vers = 0;
-	struct adapter *adapter = dev->priv;
 
 	t3_get_fw_version(adapter, &fw_vers);
 
@@ -1136,8 +1117,8 @@ static unsigned long collect_sge_port_stats(struct adapter *adapter,
 static void get_stats(struct net_device *dev, struct ethtool_stats *stats,
 		      u64 *data)
 {
-	struct adapter *adapter = dev->priv;
 	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adapter = pi->adapter;
 	const struct mac_stats *s;
 
 	spin_lock(&adapter->stats_lock);
@@ -1205,7 +1186,8 @@ static inline void reg_block_dump(struct adapter *ap, void *buf,
 static void get_regs(struct net_device *dev, struct ethtool_regs *regs,
 		     void *buf)
 {
-	struct adapter *ap = dev->priv;
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *ap = pi->adapter;
 
 	/*
 	 * Version scheme:
@@ -1246,8 +1228,9 @@ static int restart_autoneg(struct net_device *dev)
 
 static int cxgb3_phys_id(struct net_device *dev, u32 data)
 {
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adapter = pi->adapter;
 	int i;
-	struct adapter *adapter = dev->priv;
 
 	if (data == 0)
 		data = 2;
@@ -1408,8 +1391,8 @@ static int set_rx_csum(struct net_device *dev, u32 data)
 
 static void get_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
 {
-	const struct adapter *adapter = dev->priv;
-	const struct port_info *pi = netdev_priv(dev);
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adapter = pi->adapter;
 	const struct qset_params *q = &adapter->params.sge.qset[pi->first_qset];
 
 	e->rx_max_pending = MAX_RX_BUFFERS;
@@ -1425,10 +1408,10 @@ static void get_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
 
 static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
 {
-	int i;
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adapter = pi->adapter;
 	struct qset_params *q;
-	struct adapter *adapter = dev->priv;
-	const struct port_info *pi = netdev_priv(dev);
+	int i;
 
 	if (e->rx_pending > MAX_RX_BUFFERS ||
 	    e->rx_jumbo_pending > MAX_RX_JUMBO_BUFFERS ||
@@ -1457,7 +1440,8 @@ static int set_sge_param(struct net_device *dev, struct ethtool_ringparam *e)
 
 static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
 {
-	struct adapter *adapter = dev->priv;
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adapter = pi->adapter;
 	struct qset_params *qsp = &adapter->params.sge.qset[0];
 	struct sge_qset *qs = &adapter->sge.qs[0];
 
@@ -1471,7 +1455,8 @@ static int set_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
 
 static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
 {
-	struct adapter *adapter = dev->priv;
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adapter = pi->adapter;
 	struct qset_params *q = adapter->params.sge.qset;
 
 	c->rx_coalesce_usecs = q->coalesce_usecs;
@@ -1481,8 +1466,9 @@ static int get_coalesce(struct net_device *dev, struct ethtool_coalesce *c)
 static int get_eeprom(struct net_device *dev, struct ethtool_eeprom *e,
 		      u8 * data)
 {
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adapter = pi->adapter;
 	int i, err = 0;
-	struct adapter *adapter = dev->priv;
 
 	u8 *buf = kmalloc(EEPROMSIZE, GFP_KERNEL);
 	if (!buf)
@@ -1501,10 +1487,11 @@ static int get_eeprom(struct net_device *dev, struct ethtool_eeprom *e,
 static int set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom,
 		      u8 * data)
 {
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adapter = pi->adapter;
+	u32 aligned_offset, aligned_len, *p;
 	u8 *buf;
 	int err = 0;
-	u32 aligned_offset, aligned_len, *p;
-	struct adapter *adapter = dev->priv;
 
 	if (eeprom->magic != EEPROM_MAGIC)
 		return -EINVAL;
@@ -1592,9 +1579,10 @@ static int in_range(int val, int lo, int hi)
 
 static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
 {
-	int ret;
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adapter = pi->adapter;
 	u32 cmd;
-	struct adapter *adapter = dev->priv;
+	int ret;
 
 	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
 		return -EFAULT;
@@ -1923,10 +1911,10 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
 
 static int cxgb_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
 {
-	int ret, mmd;
-	struct adapter *adapter = dev->priv;
-	struct port_info *pi = netdev_priv(dev);
 	struct mii_ioctl_data *data = if_mii(req);
+	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adapter = pi->adapter;
+	int ret, mmd;
 
 	switch (cmd) {
 	case SIOCGMIIPHY:
@@ -1994,9 +1982,9 @@ static int cxgb_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
 
 static int cxgb_change_mtu(struct net_device *dev, int new_mtu)
 {
-	int ret;
-	struct adapter *adapter = dev->priv;
 	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adapter = pi->adapter;
+	int ret;
 
 	if (new_mtu < 81)	/* accommodate SACK */
 		return -EINVAL;
@@ -2013,8 +2001,8 @@ static int cxgb_change_mtu(struct net_device *dev, int new_mtu)
 
 static int cxgb_set_mac_addr(struct net_device *dev, void *p)
 {
-	struct adapter *adapter = dev->priv;
 	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adapter = pi->adapter;
 	struct sockaddr *addr = p;
 
 	if (!is_valid_ether_addr(addr->sa_data))
@@ -2050,8 +2038,8 @@ static void t3_synchronize_rx(struct adapter *adap, const struct port_info *p)
 
 static void vlan_rx_register(struct net_device *dev, struct vlan_group *grp)
 {
-	struct adapter *adapter = dev->priv;
 	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adapter = pi->adapter;
 
 	pi->vlan_grp = grp;
 	if (adapter->params.rev > 0)
@@ -2070,8 +2058,8 @@ static void vlan_rx_register(struct net_device *dev, struct vlan_group *grp)
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void cxgb_netpoll(struct net_device *dev)
 {
-	struct adapter *adapter = dev->priv;
 	struct port_info *pi = netdev_priv(dev);
+	struct adapter *adapter = pi->adapter;
 	int qidx;
 
 	for (qidx = pi->first_qset; qidx < pi->first_qset + pi->nqsets; qidx++) {
@@ -2433,6 +2421,7 @@ static int __devinit init_one(struct pci_dev *pdev,
 
 		adapter->port[i] = netdev;
 		pi = netdev_priv(netdev);
+		pi->adapter = adapter;
 		pi->rx_csum_offload = 1;
 		pi->nqsets = 1;
 		pi->first_qset = i;
@@ -2442,7 +2431,6 @@ static int __devinit init_one(struct pci_dev *pdev,
 		netdev->irq = pdev->irq;
 		netdev->mem_start = mmio_start;
 		netdev->mem_end = mmio_start + mmio_len - 1;
-		netdev->priv = adapter;
 		netdev->features |= NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
 		netdev->features |= NETIF_F_LLTX;
 		if (pci_using_dac)
@@ -2462,12 +2450,11 @@ static int __devinit init_one(struct pci_dev *pdev,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 		netdev->poll_controller = cxgb_netpoll;
 #endif
-		netdev->weight = 64;
 
 		SET_ETHTOOL_OPS(netdev, &cxgb_ethtool_ops);
 	}
 
-	pci_set_drvdata(pdev, adapter->port[0]);
+	pci_set_drvdata(pdev, adapter);
 	if (t3_prep_adapter(adapter, ai, 1) < 0) {
 		err = -ENODEV;
 		goto out_free_dev;
@@ -2547,11 +2534,10 @@ out_release_regions:
 
 static void __devexit remove_one(struct pci_dev *pdev)
 {
-	struct net_device *dev = pci_get_drvdata(pdev);
+	struct adapter *adapter = pci_get_drvdata(pdev);
 
-	if (dev) {
+	if (adapter) {
 		int i;
-		struct adapter *adapter = dev->priv;
 
 		t3_sge_stop(adapter);
 		sysfs_remove_group(&adapter->port[0]->dev.kobj,
@@ -2571,12 +2557,6 @@ static void __devexit remove_one(struct pci_dev *pdev)
 		t3_free_sge_resources(adapter);
 		cxgb_disable_msi(adapter);
 
-		for (i = 0; i < ARRAY_SIZE(adapter->dummy_netdev); i++)
-			if (adapter->dummy_netdev[i]) {
-				free_netdev(adapter->dummy_netdev[i]);
-				adapter->dummy_netdev[i] = NULL;
-			}
-
 		for_each_port(adapter, i)
 			if (adapter->port[i])
 				free_netdev(adapter->port[i]);
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index a2cfd68..09f231c 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -591,9 +591,6 @@ void t3_free_qset(struct adapter *adapter, struct sge_qset *q)
 				  q->rspq.desc, q->rspq.phys_addr);
 	}
 
-	if (q->netdev)
-		q->netdev->atalk_ptr = NULL;
-
 	memset(q, 0, sizeof(*q));
 }
 
@@ -1073,8 +1070,8 @@ int t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	unsigned int ndesc, pidx, credits, gen, compl;
 	const struct port_info *pi = netdev_priv(dev);
-	struct adapter *adap = dev->priv;
-	struct sge_qset *qs = dev2qset(dev);
+	struct adapter *adap = pi->adapter;
+	struct sge_qset *qs = &adap->sge.qs[pi->first_qset];
 	struct sge_txq *q = &qs->txq[TXQ_ETH];
 
 	/*
@@ -1326,7 +1323,7 @@ static void restart_ctrlq(unsigned long data)
 	struct sk_buff *skb;
 	struct sge_qset *qs = (struct sge_qset *)data;
 	struct sge_txq *q = &qs->txq[TXQ_CTRL];
-	struct adapter *adap = qs->netdev->priv;
+	struct adapter *adap = qs->anapi->adapter;
 
 	spin_lock(&q->lock);
       again:reclaim_completed_tx_imm(q);
@@ -1531,7 +1528,7 @@ static void restart_offloadq(unsigned long data)
 	struct sk_buff *skb;
 	struct sge_qset *qs = (struct sge_qset *)data;
 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
-	struct adapter *adap = qs->netdev->priv;
+	struct adapter *adap = qs->anapi->adapter;
 
 	spin_lock(&q->lock);
       again:reclaim_completed_tx(adap, q);
@@ -1636,8 +1633,8 @@ static inline void offload_enqueue(struct sge_rspq *q, struct sk_buff *skb)
 	else {
 		struct sge_qset *qs = rspq_to_qset(q);
 
-		if (__netif_rx_schedule_prep(qs->netdev))
-			__netif_rx_schedule(qs->netdev);
+		if (napi_schedule_prep(&qs->anapi->napi))
+			__netif_rx_schedule(qs->netdev, &qs->anapi->napi);
 		q->rx_head = skb;
 	}
 	q->rx_tail = skb;
@@ -1673,33 +1670,32 @@ static inline void deliver_partial_bundle(struct t3cdev *tdev,
  *	receive handler.  Batches need to be of modest size as we do prefetches
  *	on the packets in each.
  */
-static int ofld_poll(struct net_device *dev, int *budget)
+static int ofld_poll(struct napi_struct *napi, int budget)
 {
-	struct adapter *adapter = dev->priv;
-	struct sge_qset *qs = dev2qset(dev);
+	struct adapter_napi *anapi = container_of(napi, struct adapter_napi, napi);
+	struct adapter *adapter = anapi->adapter;
+	struct net_device *dev = adapter->port[anapi->port];
+	struct sge_qset *qs = &adapter->sge.qs[anapi->qset];
 	struct sge_rspq *q = &qs->rspq;
-	int work_done, limit = min(*budget, dev->quota), avail = limit;
+	int work_done = 0;
 
-	while (avail) {
+	while (work_done < budget) {
 		struct sk_buff *head, *tail, *skbs[RX_BUNDLE_SIZE];
 		int ngathered;
 
 		spin_lock_irq(&q->lock);
 		head = q->rx_head;
 		if (!head) {
-			work_done = limit - avail;
-			*budget -= work_done;
-			dev->quota -= work_done;
-			__netif_rx_complete(dev);
+			__netif_rx_complete(dev, napi);
 			spin_unlock_irq(&q->lock);
-			return 0;
+			return work_done;
 		}
 
 		tail = q->rx_tail;
 		q->rx_head = q->rx_tail = NULL;
 		spin_unlock_irq(&q->lock);
 
-		for (ngathered = 0; avail && head; avail--) {
+		for (ngathered = 0; work_done < budget && head; work_done++) {
 			prefetch(head->data);
 			skbs[ngathered] = head;
 			head = head->next;
@@ -1721,10 +1717,8 @@ static int ofld_poll(struct net_device *dev, int *budget)
 		}
 		deliver_partial_bundle(&adapter->tdev, q, skbs, ngathered);
 	}
-	work_done = limit - avail;
-	*budget -= work_done;
-	dev->quota -= work_done;
-	return 1;
+
+	return work_done;
 }
 
 /**
@@ -2073,44 +2067,45 @@ static inline int is_pure_response(const struct rsp_desc *r)
  *
  *	Handler for new data events when using NAPI.
  */
-static int napi_rx_handler(struct net_device *dev, int *budget)
+static int napi_rx_handler(struct napi_struct *napi, int budget)
 {
-	struct adapter *adap = dev->priv;
-	struct sge_qset *qs = dev2qset(dev);
-	int effective_budget = min(*budget, dev->quota);
-
+	struct adapter_napi *anapi = container_of(napi, struct adapter_napi, napi);
+	struct adapter *adap = anapi->adapter;
+	struct net_device *dev = adap->port[anapi->port];
+	struct sge_qset *qs = &adap->sge.qs[anapi->qset];
+	int effective_budget = budget;
 	int work_done = process_responses(adap, qs, effective_budget);
-	*budget -= work_done;
-	dev->quota -= work_done;
-
-	if (work_done >= effective_budget)
-		return 1;
 
-	netif_rx_complete(dev);
+	if (likely(work_done < effective_budget)) {
+		netif_rx_complete(dev, napi);
 
-	/*
-	 * Because we don't atomically flush the following write it is
-	 * possible that in very rare cases it can reach the device in a way
-	 * that races with a new response being written plus an error interrupt
-	 * causing the NAPI interrupt handler below to return unhandled status
-	 * to the OS.  To protect against this would require flushing the write
-	 * and doing both the write and the flush with interrupts off.  Way too
-	 * expensive and unjustifiable given the rarity of the race.
-	 *
-	 * The race cannot happen at all with MSI-X.
-	 */
-	t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
-		     V_NEWTIMER(qs->rspq.next_holdoff) |
-		     V_NEWINDEX(qs->rspq.cidx));
-	return 0;
+		/*
+		 * Because we don't atomically flush the following
+		 * write it is possible that in very rare cases it can
+		 * reach the device in a way that races with a new
+		 * response being written plus an error interrupt
+		 * causing the NAPI interrupt handler below to return
+		 * unhandled status to the OS.  To protect against
+		 * this would require flushing the write and doing
+		 * both the write and the flush with interrupts off.
+		 * Way too expensive and unjustifiable given the
+		 * rarity of the race.
+		 *
+		 * The race cannot happen at all with MSI-X.
+		 */
+		t3_write_reg(adap, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
+			     V_NEWTIMER(qs->rspq.next_holdoff) |
+			     V_NEWINDEX(qs->rspq.cidx));
+	}
+	return work_done;
 }
 
 /*
  * Returns true if the device is already scheduled for polling.
  */
-static inline int napi_is_scheduled(struct net_device *dev)
+static inline int napi_is_scheduled(struct napi_struct *napi)
 {
-	return test_bit(__LINK_STATE_RX_SCHED, &dev->state);
+	return test_bit(NAPI_STATE_SCHED, &napi->state);
 }
 
 /**
@@ -2193,8 +2188,8 @@ static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
 			     V_NEWTIMER(q->holdoff_tmr) | V_NEWINDEX(q->cidx));
 		return 0;
 	}
-	if (likely(__netif_rx_schedule_prep(qs->netdev)))
-		__netif_rx_schedule(qs->netdev);
+	if (likely(napi_schedule_prep(&qs->anapi->napi)))
+		__netif_rx_schedule(qs->netdev, &qs->anapi->napi);
 	return 1;
 }
 
@@ -2205,7 +2200,7 @@ static inline int handle_responses(struct adapter *adap, struct sge_rspq *q)
 irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
 {
 	struct sge_qset *qs = cookie;
-	struct adapter *adap = qs->netdev->priv;
+	struct adapter *adap = qs->anapi->adapter;
 	struct sge_rspq *q = &qs->rspq;
 
 	spin_lock(&q->lock);
@@ -2224,7 +2219,7 @@ irqreturn_t t3_sge_intr_msix(int irq, void *cookie)
 irqreturn_t t3_sge_intr_msix_napi(int irq, void *cookie)
 {
 	struct sge_qset *qs = cookie;
-	struct adapter *adap = qs->netdev->priv;
+	struct adapter *adap = qs->anapi->adapter;
 	struct sge_rspq *q = &qs->rspq;
 
 	spin_lock(&q->lock);
@@ -2272,11 +2267,14 @@ static irqreturn_t t3_intr_msi(int irq, void *cookie)
 	return IRQ_HANDLED;
 }
 
-static int rspq_check_napi(struct net_device *dev, struct sge_rspq *q)
+static int rspq_check_napi(struct sge_qset *qs)
 {
-	if (!napi_is_scheduled(dev) && is_new_response(&q->desc[q->cidx], q)) {
-		if (likely(__netif_rx_schedule_prep(dev)))
-			__netif_rx_schedule(dev);
+	struct sge_rspq *q = &qs->rspq;
+
+	if (!napi_is_scheduled(&qs->anapi->napi) &&
+	    is_new_response(&q->desc[q->cidx], q)) {
+		if (likely(napi_schedule_prep(&qs->anapi->napi)))
+			__netif_rx_schedule(qs->netdev, &qs->anapi->napi);
 		return 1;
 	}
 	return 0;
@@ -2297,10 +2295,9 @@ irqreturn_t t3_intr_msi_napi(int irq, void *cookie)
 
 	spin_lock(&q->lock);
 
-	new_packets = rspq_check_napi(adap->sge.qs[0].netdev, q);
+	new_packets = rspq_check_napi(&adap->sge.qs[0]);
 	if (adap->params.nports == 2)
-		new_packets += rspq_check_napi(adap->sge.qs[1].netdev,
-					       &adap->sge.qs[1].rspq);
+		new_packets += rspq_check_napi(&adap->sge.qs[1]);
 	if (!new_packets && t3_slow_intr_handler(adap) == 0)
 		q->unhandled_irqs++;
 
@@ -2405,7 +2402,8 @@ static irqreturn_t t3b_intr_napi(int irq, void *cookie)
 	u32 map;
 	struct net_device *dev;
 	struct adapter *adap = cookie;
-	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
+	struct sge_qset *qs0 = &adap->sge.qs[0];
+	struct sge_rspq *q0 = &qs0->rspq;
 
 	t3_write_reg(adap, A_PL_CLI, 0);
 	map = t3_read_reg(adap, A_SG_DATA_INTR);
@@ -2419,16 +2417,17 @@ static irqreturn_t t3b_intr_napi(int irq, void *cookie)
 		t3_slow_intr_handler(adap);
 
 	if (likely(map & 1)) {
-		dev = adap->sge.qs[0].netdev;
+		dev = qs0->netdev;
 
-		if (likely(__netif_rx_schedule_prep(dev)))
-			__netif_rx_schedule(dev);
+		if (likely(napi_schedule_prep(&qs0->anapi->napi)))
+			__netif_rx_schedule(dev, &qs0->anapi->napi);
 	}
 	if (map & 2) {
-		dev = adap->sge.qs[1].netdev;
+		struct sge_qset *qs1 = &adap->sge.qs[1];
 
-		if (likely(__netif_rx_schedule_prep(dev)))
-			__netif_rx_schedule(dev);
+		dev = qs1->netdev;
+		if (likely(napi_schedule_prep(&qs1->anapi->napi)))
+			__netif_rx_schedule(dev, &qs1->anapi->napi);
 	}
 
 	spin_unlock(&q0->lock);
@@ -2508,7 +2507,7 @@ static void sge_timer_cb(unsigned long data)
 {
 	spinlock_t *lock;
 	struct sge_qset *qs = (struct sge_qset *)data;
-	struct adapter *adap = qs->netdev->priv;
+	struct adapter *adap = qs->anapi->adapter;
 
 	if (spin_trylock(&qs->txq[TXQ_ETH].lock)) {
 		reclaim_completed_tx(adap, &qs->txq[TXQ_ETH]);
@@ -2521,7 +2520,7 @@ static void sge_timer_cb(unsigned long data)
 	lock = (adap->flags & USING_MSIX) ? &qs->rspq.lock :
 	    &adap->sge.qs[0].rspq.lock;
 	if (spin_trylock_irq(lock)) {
-		if (!napi_is_scheduled(qs->netdev)) {
+		if (!napi_is_scheduled(&qs->anapi->napi)) {
 			u32 status = t3_read_reg(adap, A_SG_RSPQ_FL_STATUS);
 
 			if (qs->fl[0].credits < qs->fl[0].size)
@@ -2555,12 +2554,9 @@ static void sge_timer_cb(unsigned long data)
  */
 void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
 {
-	if (!qs->netdev)
-		return;
-
 	qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);/* can't be 0 */
 	qs->rspq.polling = p->polling;
-	qs->netdev->poll = p->polling ? napi_rx_handler : ofld_poll;
+	qs->anapi->napi.poll = p->polling ? napi_rx_handler : ofld_poll;
 }
 
 /**
@@ -2580,7 +2576,8 @@ void t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
  */
 int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
 		      int irq_vec_idx, const struct qset_params *p,
-		      int ntxq, struct net_device *netdev)
+		      int ntxq, struct adapter_napi *anapi,
+		      struct net_device *dev)
 {
 	int i, ret = -ENOMEM;
 	struct sge_qset *q = &adapter->sge.qs[id];
@@ -2701,17 +2698,10 @@ int t3_sge_alloc_qset(struct adapter *adapter, unsigned int id, int nports,
 	}
 
 	spin_unlock(&adapter->sge.reg_lock);
-	q->netdev = netdev;
+	q->anapi = anapi;
+	q->netdev = dev;
 	t3_update_qset_coalesce(q, p);
 
-	/*
-	 * We use atalk_ptr as a backpointer to a qset.  In case a device is
-	 * associated with multiple queue sets only the first one sets
-	 * atalk_ptr.
-	 */
-	if (netdev->atalk_ptr == NULL)
-		netdev->atalk_ptr = q;
-
 	refill_fl(adapter, &q->fl[0], q->fl[0].size, GFP_KERNEL);
 	refill_fl(adapter, &q->fl[1], q->fl[1].size, GFP_KERNEL);
 	refill_rspq(adapter, &q->rspq, q->rspq.size - 1);
diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index 280313b..e25f5ec 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -539,6 +539,7 @@ struct nic {
 	struct csr __iomem *csr;
 	enum scb_cmd_lo cuc_cmd;
 	unsigned int cbs_avail;
+	struct napi_struct napi;
 	struct cb *cbs;
 	struct cb *cb_to_use;
 	struct cb *cb_to_send;
@@ -1974,35 +1975,31 @@ static irqreturn_t e100_intr(int irq, void *dev_id)
 	if(stat_ack & stat_ack_rnr)
 		nic->ru_running = RU_SUSPENDED;
 
-	if(likely(netif_rx_schedule_prep(netdev))) {
+	if(likely(netif_rx_schedule_prep(netdev, &nic->napi))) {
 		e100_disable_irq(nic);
-		__netif_rx_schedule(netdev);
+		__netif_rx_schedule(netdev, &nic->napi);
 	}
 
 	return IRQ_HANDLED;
 }
 
-static int e100_poll(struct net_device *netdev, int *budget)
+static int e100_poll(struct napi_struct *napi, int budget)
 {
-	struct nic *nic = netdev_priv(netdev);
-	unsigned int work_to_do = min(netdev->quota, *budget);
-	unsigned int work_done = 0;
+	struct nic *nic = container_of(napi, struct nic, napi);
+	struct net_device *netdev = nic->netdev;
+	int work_done = 0;
 	int tx_cleaned;
 
-	e100_rx_clean(nic, &work_done, work_to_do);
+	e100_rx_clean(nic, &work_done, budget);
 	tx_cleaned = e100_tx_clean(nic);
 
 	/* If no Rx and Tx cleanup work was done, exit polling mode. */
 	if((!tx_cleaned && (work_done == 0)) || !netif_running(netdev)) {
-		netif_rx_complete(netdev);
+		netif_rx_complete(netdev, napi);
 		e100_enable_irq(nic);
-		return 0;
 	}
 
-	*budget -= work_done;
-	netdev->quota -= work_done;
-
-	return 1;
+	return work_done;
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -2071,7 +2068,7 @@ static int e100_up(struct nic *nic)
 		nic->netdev->name, nic->netdev)))
 		goto err_no_irq;
 	netif_wake_queue(nic->netdev);
-	netif_poll_enable(nic->netdev);
+	napi_enable(&nic->napi);
 	/* enable ints _after_ enabling poll, preventing a race between
 	 * disable ints+schedule */
 	e100_enable_irq(nic);
@@ -2089,7 +2086,7 @@ err_rx_clean_list:
 static void e100_down(struct nic *nic)
 {
 	/* wait here for poll to complete */
-	netif_poll_disable(nic->netdev);
+	napi_disable(&nic->napi);
 	netif_stop_queue(nic->netdev);
 	e100_hw_reset(nic);
 	free_irq(nic->pdev->irq, nic->netdev);
@@ -2572,14 +2569,13 @@ static int __devinit e100_probe(struct pci_dev *pdev,
 	SET_ETHTOOL_OPS(netdev, &e100_ethtool_ops);
 	netdev->tx_timeout = e100_tx_timeout;
 	netdev->watchdog_timeo = E100_WATCHDOG_PERIOD;
-	netdev->poll = e100_poll;
-	netdev->weight = E100_NAPI_WEIGHT;
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	netdev->poll_controller = e100_netpoll;
 #endif
 	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
 
 	nic = netdev_priv(netdev);
+	netif_napi_add(netdev, &nic->napi, e100_poll, E100_NAPI_WEIGHT);
 	nic->netdev = netdev;
 	nic->pdev = pdev;
 	nic->msg_enable = (1 << debug) - 1;
@@ -2733,7 +2729,7 @@ static int e100_suspend(struct pci_dev *pdev, pm_message_t state)
 	struct nic *nic = netdev_priv(netdev);
 
 	if (netif_running(netdev))
-		netif_poll_disable(nic->netdev);
+		napi_disable(&nic->napi);
 	del_timer_sync(&nic->watchdog);
 	netif_carrier_off(nic->netdev);
 	netif_device_detach(netdev);
@@ -2779,7 +2775,7 @@ static void e100_shutdown(struct pci_dev *pdev)
 	struct nic *nic = netdev_priv(netdev);
 
 	if (netif_running(netdev))
-		netif_poll_disable(nic->netdev);
+		napi_disable(&nic->napi);
 	del_timer_sync(&nic->watchdog);
 	netif_carrier_off(nic->netdev);
 
@@ -2804,12 +2800,13 @@ static void e100_shutdown(struct pci_dev *pdev)
 static pci_ers_result_t e100_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct nic *nic = netdev_priv(netdev);
 
 	/* Similar to calling e100_down(), but avoids adpater I/O. */
 	netdev->stop(netdev);
 
 	/* Detach; put netif into state similar to hotplug unplug. */
-	netif_poll_enable(netdev);
+	napi_enable(&nic->napi);
 	netif_device_detach(netdev);
 	pci_disable_device(pdev);
 
diff --git a/drivers/net/e1000/e1000.h b/drivers/net/e1000/e1000.h
index 16a6edf..781ed99 100644
--- a/drivers/net/e1000/e1000.h
+++ b/drivers/net/e1000/e1000.h
@@ -300,6 +300,7 @@ struct e1000_adapter {
 				int cleaned_count);
 	struct e1000_rx_ring *rx_ring;      /* One per active queue */
 #ifdef CONFIG_E1000_NAPI
+	struct napi_struct napi;
 	struct net_device *polling_netdev;  /* One per active queue */
 #endif
 	int num_tx_queues;
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index f48b659..5978c44 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -162,7 +162,7 @@ static irqreturn_t e1000_intr_msi(int irq, void *data);
 static boolean_t e1000_clean_tx_irq(struct e1000_adapter *adapter,
                                     struct e1000_tx_ring *tx_ring);
 #ifdef CONFIG_E1000_NAPI
-static int e1000_clean(struct net_device *poll_dev, int *budget);
+static int e1000_clean(struct napi_struct *napi, int budget);
 static boolean_t e1000_clean_rx_irq(struct e1000_adapter *adapter,
                                     struct e1000_rx_ring *rx_ring,
                                     int *work_done, int work_to_do);
@@ -541,7 +541,7 @@ int e1000_up(struct e1000_adapter *adapter)
 	clear_bit(__E1000_DOWN, &adapter->flags);
 
 #ifdef CONFIG_E1000_NAPI
-	netif_poll_enable(adapter->netdev);
+	napi_enable(&adapter->napi);
 #endif
 	e1000_irq_enable(adapter);
 
@@ -630,7 +630,7 @@ e1000_down(struct e1000_adapter *adapter)
 	set_bit(__E1000_DOWN, &adapter->flags);
 
 #ifdef CONFIG_E1000_NAPI
-	netif_poll_disable(netdev);
+	napi_disable(&adapter->napi);
 #endif
 	e1000_irq_disable(adapter);
 
@@ -932,8 +932,7 @@ e1000_probe(struct pci_dev *pdev,
 	netdev->tx_timeout = &e1000_tx_timeout;
 	netdev->watchdog_timeo = 5 * HZ;
 #ifdef CONFIG_E1000_NAPI
-	netdev->poll = &e1000_clean;
-	netdev->weight = 64;
+	netif_napi_add(netdev, &adapter->napi, e1000_clean, 64);
 #endif
 	netdev->vlan_rx_register = e1000_vlan_rx_register;
 	netdev->vlan_rx_add_vid = e1000_vlan_rx_add_vid;
@@ -1146,7 +1145,7 @@ e1000_probe(struct pci_dev *pdev,
 	netif_carrier_off(netdev);
 	netif_stop_queue(netdev);
 #ifdef CONFIG_E1000_NAPI
-	netif_poll_disable(netdev);
+	napi_disable(&adapter->napi);
 #endif
 
 	strcpy(netdev->name, "eth%d");
@@ -1319,8 +1318,6 @@ e1000_sw_init(struct e1000_adapter *adapter)
 #ifdef CONFIG_E1000_NAPI
 	for (i = 0; i < adapter->num_rx_queues; i++) {
 		adapter->polling_netdev[i].priv = adapter;
-		adapter->polling_netdev[i].poll = &e1000_clean;
-		adapter->polling_netdev[i].weight = 64;
 		dev_hold(&adapter->polling_netdev[i]);
 		set_bit(__LINK_STATE_START, &adapter->polling_netdev[i].state);
 	}
@@ -1437,7 +1434,7 @@ e1000_open(struct net_device *netdev)
 	clear_bit(__E1000_DOWN, &adapter->flags);
 
 #ifdef CONFIG_E1000_NAPI
-	netif_poll_enable(netdev);
+	napi_enable(&adapter->napi);
 #endif
 
 	e1000_irq_enable(adapter);
@@ -1476,6 +1473,10 @@ e1000_close(struct net_device *netdev)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 
+#ifdef CONFIG_E1000_NAPI
+	napi_disable(&adapter->napi);
+#endif
+
 	WARN_ON(test_bit(__E1000_RESETTING, &adapter->flags));
 	e1000_down(adapter);
 	e1000_power_down_phy(adapter);
@@ -3780,12 +3781,12 @@ e1000_intr_msi(int irq, void *data)
 	}
 
 #ifdef CONFIG_E1000_NAPI
-	if (likely(netif_rx_schedule_prep(netdev))) {
+	if (likely(netif_rx_schedule_prep(netdev, &adapter->napi))) {
 		adapter->total_tx_bytes = 0;
 		adapter->total_tx_packets = 0;
 		adapter->total_rx_bytes = 0;
 		adapter->total_rx_packets = 0;
-		__netif_rx_schedule(netdev);
+		__netif_rx_schedule(netdev, &adapter->napi);
 	} else
 		e1000_irq_enable(adapter);
 #else
@@ -3865,12 +3866,12 @@ e1000_intr(int irq, void *data)
 		E1000_WRITE_REG(hw, IMC, ~0);
 		E1000_WRITE_FLUSH(hw);
 	}
-	if (likely(netif_rx_schedule_prep(netdev))) {
+	if (likely(netif_rx_schedule_prep(netdev, &adapter->napi))) {
 		adapter->total_tx_bytes = 0;
 		adapter->total_tx_packets = 0;
 		adapter->total_rx_bytes = 0;
 		adapter->total_rx_packets = 0;
-		__netif_rx_schedule(netdev);
+		__netif_rx_schedule(netdev, &adapter->napi);
 	} else
 		/* this really should not happen! if it does it is basically a
 		 * bug, but not a hard error, so enable ints and continue */
@@ -3918,10 +3919,10 @@ e1000_intr(int irq, void *data)
  **/
 
 static int
-e1000_clean(struct net_device *poll_dev, int *budget)
+e1000_clean(struct napi_struct *napi, int budget)
 {
-	struct e1000_adapter *adapter;
-	int work_to_do = min(*budget, poll_dev->quota);
+	struct e1000_adapter *adapter = container_of(napi, struct e1000_adapter, napi);
+	struct net_device *poll_dev = adapter->netdev;
 	int tx_cleaned = 0, work_done = 0;
 
 	/* Must NOT use netdev_priv macro here. */
@@ -3942,23 +3943,19 @@ e1000_clean(struct net_device *poll_dev, int *budget)
 	}
 
 	adapter->clean_rx(adapter, &adapter->rx_ring[0],
-	                  &work_done, work_to_do);
-
-	*budget -= work_done;
-	poll_dev->quota -= work_done;
+	                  &work_done, budget);
 
 	/* If no Tx and not enough Rx work done, exit the polling mode */
-	if ((!tx_cleaned && (work_done == 0)) ||
+	if ((tx_cleaned && (work_done < budget)) ||
 	   !netif_running(poll_dev)) {
 quit_polling:
 		if (likely(adapter->itr_setting & 3))
 			e1000_set_itr(adapter);
-		netif_rx_complete(poll_dev);
+		netif_rx_complete(poll_dev, napi);
 		e1000_irq_enable(adapter);
-		return 0;
 	}
 
-	return 1;
+	return work_done;
 }
 
 #endif
diff --git a/drivers/net/ehea/ehea.h b/drivers/net/ehea/ehea.h
index 8ee2c2c..eb019c8 100644
--- a/drivers/net/ehea/ehea.h
+++ b/drivers/net/ehea/ehea.h
@@ -351,6 +351,7 @@ struct ehea_q_skb_arr {
  * Port resources
  */
 struct ehea_port_res {
+	struct napi_struct napi;
 	struct port_stats p_stats;
 	struct ehea_mr send_mr;       	/* send memory region */
 	struct ehea_mr recv_mr;       	/* receive memory region */
@@ -362,7 +363,6 @@ struct ehea_port_res {
 	struct ehea_cq *send_cq;
 	struct ehea_cq *recv_cq;
 	struct ehea_eq *eq;
-	struct net_device *d_netdev;
 	struct ehea_q_skb_arr rq1_skba;
 	struct ehea_q_skb_arr rq2_skba;
 	struct ehea_q_skb_arr rq3_skba;
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index 58702f5..3353e66 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -389,9 +389,9 @@ static int ehea_treat_poll_error(struct ehea_port_res *pr, int rq,
 	return 0;
 }
 
-static struct ehea_cqe *ehea_proc_rwqes(struct net_device *dev,
-					struct ehea_port_res *pr,
-					int *budget)
+static int ehea_proc_rwqes(struct net_device *dev,
+			   struct ehea_port_res *pr,
+			   int budget)
 {
 	struct ehea_port *port = pr->port;
 	struct ehea_qp *qp = pr->qp;
@@ -404,18 +404,16 @@ static struct ehea_cqe *ehea_proc_rwqes(struct net_device *dev,
 	int skb_arr_rq2_len = pr->rq2_skba.len;
 	int skb_arr_rq3_len = pr->rq3_skba.len;
 	int processed, processed_rq1, processed_rq2, processed_rq3;
-	int wqe_index, last_wqe_index, rq, my_quota, port_reset;
+	int wqe_index, last_wqe_index, rq, port_reset;
 
 	processed = processed_rq1 = processed_rq2 = processed_rq3 = 0;
 	last_wqe_index = 0;
-	my_quota = min(*budget, dev->quota);
 
 	cqe = ehea_poll_rq1(qp, &wqe_index);
-	while ((my_quota > 0) && cqe) {
+	while ((processed < budget) && cqe) {
 		ehea_inc_rq1(qp);
 		processed_rq1++;
 		processed++;
-		my_quota--;
 		if (netif_msg_rx_status(port))
 			ehea_dump(cqe, sizeof(*cqe), "CQE");
 
@@ -430,14 +428,14 @@ static struct ehea_cqe *ehea_proc_rwqes(struct net_device *dev,
 					if (netif_msg_rx_err(port))
 						ehea_error("LL rq1: skb=NULL");
 
-					skb = netdev_alloc_skb(port->netdev,
+					skb = netdev_alloc_skb(dev,
 							       EHEA_L_PKT_SIZE);
 					if (!skb)
 						break;
 				}
 				skb_copy_to_linear_data(skb, ((char*)cqe) + 64,
 						 cqe->num_bytes_transfered - 4);
-				ehea_fill_skb(port->netdev, skb, cqe);
+				ehea_fill_skb(dev, skb, cqe);
 			} else if (rq == 2) {  /* RQ2 */
 				skb = get_skb_by_index(skb_arr_rq2,
 						       skb_arr_rq2_len, cqe);
@@ -446,7 +444,7 @@ static struct ehea_cqe *ehea_proc_rwqes(struct net_device *dev,
 						ehea_error("rq2: skb=NULL");
 					break;
 				}
-				ehea_fill_skb(port->netdev, skb, cqe);
+				ehea_fill_skb(dev, skb, cqe);
 				processed_rq2++;
 			} else {  /* RQ3 */
 				skb = get_skb_by_index(skb_arr_rq3,
@@ -456,7 +454,7 @@ static struct ehea_cqe *ehea_proc_rwqes(struct net_device *dev,
 						ehea_error("rq3: skb=NULL");
 					break;
 				}
-				ehea_fill_skb(port->netdev, skb, cqe);
+				ehea_fill_skb(dev, skb, cqe);
 				processed_rq3++;
 			}
 
@@ -480,14 +478,12 @@ static struct ehea_cqe *ehea_proc_rwqes(struct net_device *dev,
 	}
 
 	pr->rx_packets += processed;
-	*budget -= processed;
 
 	ehea_refill_rq1(pr, last_wqe_index, processed_rq1);
 	ehea_refill_rq2(pr, processed_rq2);
 	ehea_refill_rq3(pr, processed_rq3);
 
-	cqe = ehea_poll_rq1(qp, &wqe_index);
-	return cqe;
+	return processed;
 }
 
 static struct ehea_cqe *ehea_proc_cqes(struct ehea_port_res *pr, int my_quota)
@@ -551,12 +547,13 @@ static struct ehea_cqe *ehea_proc_cqes(struct ehea_port_res *pr, int my_quota)
 
 #define EHEA_NAPI_POLL_NUM_BEFORE_IRQ 16
 
-static int ehea_poll(struct net_device *dev, int *budget)
+static int ehea_poll(struct napi_struct *napi, int budget)
 {
-	struct ehea_port_res *pr = dev->priv;
+	struct ehea_port_res *pr = container_of(napi, struct ehea_port_res, napi);
+	struct net_device *dev = pr->port->netdev;
 	struct ehea_cqe *cqe;
 	struct ehea_cqe *cqe_skb = NULL;
-	int force_irq, wqe_index;
+	int force_irq, wqe_index, rx;
 
 	cqe = ehea_poll_rq1(pr->qp, &wqe_index);
 	cqe_skb = ehea_poll_cq(pr->send_cq);
@@ -565,7 +562,7 @@ static int ehea_poll(struct net_device *dev, int *budget)
 
 	if ((!cqe && !cqe_skb) || force_irq) {
 		pr->poll_counter = 0;
-		netif_rx_complete(dev);
+		netif_rx_complete(dev, napi);
 		ehea_reset_cq_ep(pr->recv_cq);
 		ehea_reset_cq_ep(pr->send_cq);
 		ehea_reset_cq_n1(pr->recv_cq);
@@ -576,17 +573,18 @@ static int ehea_poll(struct net_device *dev, int *budget)
 		if (!cqe && !cqe_skb)
 			return 0;
 
-		if (!netif_rx_reschedule(dev, dev->quota))
+		if (!netif_rx_reschedule(dev, napi))
 			return 0;
 	}
 
-	cqe = ehea_proc_rwqes(dev, pr, budget);
+	rx = ehea_proc_rwqes(dev, pr, budget);
+	cqe = ehea_poll_rq1(pr->qp, &wqe_index);
 	cqe_skb = ehea_proc_cqes(pr, 300);
 
 	if (cqe || cqe_skb)
 		pr->poll_counter++;
 
-	return 1;
+	return rx;
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -610,7 +608,7 @@ static irqreturn_t ehea_recv_irq_handler(int irq, void *param)
 {
 	struct ehea_port_res *pr = param;
 
-	netif_rx_schedule(pr->d_netdev);
+	netif_rx_schedule(pr->port->netdev, &pr->napi);
 
 	return IRQ_HANDLED;
 }
@@ -1224,14 +1222,7 @@ static int ehea_init_port_res(struct ehea_port *port, struct ehea_port_res *pr,
 
 	kfree(init_attr);
 
-	pr->d_netdev = alloc_netdev(0, "", ether_setup);
-	if (!pr->d_netdev)
-		goto out_free;
-	pr->d_netdev->priv = pr;
-	pr->d_netdev->weight = 64;
-	pr->d_netdev->poll = ehea_poll;
-	set_bit(__LINK_STATE_START, &pr->d_netdev->state);
-	strcpy(pr->d_netdev->name, port->netdev->name);
+	netif_napi_add(pr->port->netdev, &pr->napi, ehea_poll, 64);
 
 	ret = 0;
 	goto out;
@@ -1254,8 +1245,6 @@ static int ehea_clean_portres(struct ehea_port *port, struct ehea_port_res *pr)
 {
 	int ret, i;
 
-	free_netdev(pr->d_netdev);
-
 	ret = ehea_destroy_qp(pr->qp);
 
 	if (!ret) {
@@ -2278,9 +2267,7 @@ static int ehea_down(struct net_device *dev)
 	ehea_free_interrupts(dev);
 
 	for (i = 0; i < port->num_def_qps; i++)
-		while (test_bit(__LINK_STATE_RX_SCHED,
-				&port->port_res[i].d_netdev->state))
-			msleep(1);
+		napi_disable(&port->port_res[i].napi);
 
 	port->state = EHEA_PORT_DOWN;
 
@@ -2643,11 +2630,9 @@ struct ehea_port *ehea_setup_single_port(struct ehea_adapter *adapter,
 	memcpy(dev->dev_addr, &port->mac_addr, ETH_ALEN);
 
 	dev->open = ehea_open;
-	dev->poll = ehea_poll_firstqueue;
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	dev->poll_controller = ehea_netpoll;
 #endif
-	dev->weight = 64;
 	dev->stop = ehea_stop;
 	dev->hard_start_xmit = ehea_start_xmit;
 	dev->get_stats = ehea_get_stats;
diff --git a/drivers/net/epic100.c b/drivers/net/epic100.c
index 1197784..211909d 100644
--- a/drivers/net/epic100.c
+++ b/drivers/net/epic100.c
@@ -262,6 +262,7 @@ struct epic_private {
 	/* Ring pointers. */
 	spinlock_t lock;				/* Group with Tx control cache line. */
 	spinlock_t napi_lock;
+	struct napi_struct napi;
 	unsigned int reschedule_in_poll;
 	unsigned int cur_tx, dirty_tx;
 
@@ -294,7 +295,7 @@ static void epic_tx_timeout(struct net_device *dev);
 static void epic_init_ring(struct net_device *dev);
 static int epic_start_xmit(struct sk_buff *skb, struct net_device *dev);
 static int epic_rx(struct net_device *dev, int budget);
-static int epic_poll(struct net_device *dev, int *budget);
+static int epic_poll(struct napi_struct *napi, int budget);
 static irqreturn_t epic_interrupt(int irq, void *dev_instance);
 static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static const struct ethtool_ops netdev_ethtool_ops;
@@ -487,8 +488,7 @@ static int __devinit epic_init_one (struct pci_dev *pdev,
 	dev->ethtool_ops = &netdev_ethtool_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
 	dev->tx_timeout = &epic_tx_timeout;
-	dev->poll = epic_poll;
-	dev->weight = 64;
+	netif_napi_add(dev, &ep->napi, epic_poll, 64);
 
 	ret = register_netdev(dev);
 	if (ret < 0)
@@ -1103,9 +1103,9 @@ static irqreturn_t epic_interrupt(int irq, void *dev_instance)
 
 	if ((status & EpicNapiEvent) && !ep->reschedule_in_poll) {
 		spin_lock(&ep->napi_lock);
-		if (netif_rx_schedule_prep(dev)) {
+		if (netif_rx_schedule_prep(dev, &ep->napi)) {
 			epic_napi_irq_off(dev, ep);
-			__netif_rx_schedule(dev);
+			__netif_rx_schedule(dev, &ep->napi);
 		} else
 			ep->reschedule_in_poll++;
 		spin_unlock(&ep->napi_lock);
@@ -1257,26 +1257,22 @@ static void epic_rx_err(struct net_device *dev, struct epic_private *ep)
 		outw(RxQueued, ioaddr + COMMAND);
 }
 
-static int epic_poll(struct net_device *dev, int *budget)
+static int epic_poll(struct napi_struct *napi, int budget)
 {
-	struct epic_private *ep = dev->priv;
-	int work_done = 0, orig_budget;
+	struct epic_private *ep = container_of(napi, struct epic_private, napi);
+	struct net_device *dev = ep->mii.dev;
+	int work_done = 0;
 	long ioaddr = dev->base_addr;
 
-	orig_budget = (*budget > dev->quota) ? dev->quota : *budget;
-
 rx_action:
 
 	epic_tx(dev, ep);
 
-	work_done += epic_rx(dev, *budget);
+	work_done += epic_rx(dev, budget);
 
 	epic_rx_err(dev, ep);
 
-	*budget -= work_done;
-	dev->quota -= work_done;
-
-	if (netif_running(dev) && (work_done < orig_budget)) {
+	if (netif_running(dev) && (work_done < budget)) {
 		unsigned long flags;
 		int more;
 
@@ -1286,7 +1282,7 @@ rx_action:
 
 		more = ep->reschedule_in_poll;
 		if (!more) {
-			__netif_rx_complete(dev);
+			__netif_rx_complete(dev, napi);
 			outl(EpicNapiEvent, ioaddr + INTSTAT);
 			epic_napi_irq_on(dev, ep);
 		} else
@@ -1298,7 +1294,7 @@ rx_action:
 			goto rx_action;
 	}
 
-	return (work_done >= orig_budget);
+	return work_done;
 }
 
 static int epic_close(struct net_device *dev)
@@ -1309,6 +1305,7 @@ static int epic_close(struct net_device *dev)
 	int i;
 
 	netif_stop_queue(dev);
+	napi_disable(&ep->napi);
 
 	if (debug > 1)
 		printk(KERN_DEBUG "%s: Shutting down ethercard, status was %2.2x.\n",
diff --git a/drivers/net/fec_8xx/fec_8xx.h b/drivers/net/fec_8xx/fec_8xx.h
index 5af60b0..f3b1c6f 100644
--- a/drivers/net/fec_8xx/fec_8xx.h
+++ b/drivers/net/fec_8xx/fec_8xx.h
@@ -105,6 +105,8 @@ struct fec;
 struct fec_enet_private {
 	spinlock_t lock;	/* during all ops except TX pckt processing */
 	spinlock_t tx_lock;	/* during fec_start_xmit and fec_tx         */
+	struct net_device *dev;
+	struct napi_struct napi;
 	int fecno;
 	struct fec *fecp;
 	const struct fec_platform_info *fpi;
diff --git a/drivers/net/fec_8xx/fec_main.c b/drivers/net/fec_8xx/fec_main.c
index e5502af..e96a7b0 100644
--- a/drivers/net/fec_8xx/fec_main.c
+++ b/drivers/net/fec_8xx/fec_main.c
@@ -465,9 +465,9 @@ void fec_stop(struct net_device *dev)
 }
 
 /* common receive function */
-static int fec_enet_rx_common(struct net_device *dev, int *budget)
+static int fec_enet_rx_common(struct fec_enet_private *ep,
+			      struct net_device *dev, int budget)
 {
-	struct fec_enet_private *fep = netdev_priv(dev);
 	fec_t *fecp = fep->fecp;
 	const struct fec_platform_info *fpi = fep->fpi;
 	cbd_t *bdp;
@@ -475,11 +475,8 @@ static int fec_enet_rx_common(struct net_device *dev, int *budget)
 	int received = 0;
 	__u16 pkt_len, sc;
 	int curidx;
-	int rx_work_limit;
 
 	if (fpi->use_napi) {
-		rx_work_limit = min(dev->quota, *budget);
-
 		if (!netif_running(dev))
 			return 0;
 	}
@@ -530,11 +527,6 @@ static int fec_enet_rx_common(struct net_device *dev, int *budget)
 			BUG_ON(skbn == NULL);
 
 		} else {
-
-			/* napi, got packet but no quota */
-			if (fpi->use_napi && --rx_work_limit < 0)
-				break;
-
 			skb = fep->rx_skbuff[curidx];
 			BUG_ON(skb == NULL);
 
@@ -599,25 +591,24 @@ static int fec_enet_rx_common(struct net_device *dev, int *budget)
 		 * able to keep up at the expense of system resources.
 		 */
 		FW(fecp, r_des_active, 0x01000000);
+
+		if (received >= budget)
+			break;
+
 	}
 
 	fep->cur_rx = bdp;
 
 	if (fpi->use_napi) {
-		dev->quota -= received;
-		*budget -= received;
-
-		if (rx_work_limit < 0)
-			return 1;	/* not done */
+		if (received < budget) {
+			netif_rx_complete(dev, &fep->napi);
 
-		/* done */
-		netif_rx_complete(dev);
-
-		/* enable RX interrupt bits */
-		FS(fecp, imask, FEC_ENET_RXF | FEC_ENET_RXB);
+			/* enable RX interrupt bits */
+			FS(fecp, imask, FEC_ENET_RXF | FEC_ENET_RXB);
+		}
 	}
 
-	return 0;
+	return received;
 }
 
 static void fec_enet_tx(struct net_device *dev)
@@ -743,12 +734,12 @@ fec_enet_interrupt(int irq, void *dev_id)
 
 		if ((int_events & FEC_ENET_RXF) != 0) {
 			if (!fpi->use_napi)
-				fec_enet_rx_common(dev, NULL);
+				fec_enet_rx_common(fep, dev, ~0);
 			else {
-				if (netif_rx_schedule_prep(dev)) {
+				if (netif_rx_schedule_prep(dev, &fep->napi)) {
 					/* disable rx interrupts */
 					FC(fecp, imask, FEC_ENET_RXF | FEC_ENET_RXB);
-					__netif_rx_schedule(dev);
+					__netif_rx_schedule(dev, &fep->napi);
 				} else {
 					printk(KERN_ERR DRV_MODULE_NAME
 					       ": %s driver bug! interrupt while in poll!\n",
@@ -932,6 +923,7 @@ static int fec_enet_close(struct net_device *dev)
 	unsigned long flags;
 
 	netif_stop_queue(dev);
+	napi_disable(&fep->napi);
 	netif_carrier_off(dev);
 
 	if (fpi->use_mdio)
@@ -955,9 +947,12 @@ static struct net_device_stats *fec_enet_get_stats(struct net_device *dev)
 	return &fep->stats;
 }
 
-static int fec_enet_poll(struct net_device *dev, int *budget)
+static int fec_enet_poll(struct napi_struct *napi, int budget)
 {
-	return fec_enet_rx_common(dev, budget);
+	struct fec_enet_private *fep = container_of(napi, struct fec_enet_private, napi);
+	struct net_device *dev = fep->dev;
+
+	return fec_enet_rx_common(fep, dev, budget);
 }
 
 /*************************************************************************/
@@ -1107,6 +1102,7 @@ int fec_8xx_init_one(const struct fec_platform_info *fpi,
 	SET_MODULE_OWNER(dev);
 
 	fep = netdev_priv(dev);
+	fep->dev = dev;
 
 	/* partial reset of FEC */
 	fec_whack_reset(fecp);
@@ -1172,10 +1168,10 @@ int fec_8xx_init_one(const struct fec_platform_info *fpi,
 	dev->get_stats = fec_enet_get_stats;
 	dev->set_multicast_list = fec_set_multicast_list;
 	dev->set_mac_address = fec_set_mac_address;
-	if (fpi->use_napi) {
-		dev->poll = fec_enet_poll;
-		dev->weight = fpi->napi_weight;
-	}
+	if (fpi->use_napi)
+		netif_napi_add(dev, &fec->napi,
+			       fec_enet_poll, fpi->napi_weight);
+
 	dev->ethtool_ops = &fec_ethtool_ops;
 	dev->do_ioctl = fec_ioctl;
 
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index 69f5f36..37732f9 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -745,6 +745,9 @@ struct nv_skb_map {
 struct fe_priv {
 	spinlock_t lock;
 
+	struct net_device *dev;
+	struct napi_struct napi;
+
 	/* General data:
 	 * Locking: spin_lock(&np->lock); */
 	struct net_device_stats stats;
@@ -1586,9 +1589,10 @@ static int nv_alloc_rx_optimized(struct net_device *dev)
 static void nv_do_rx_refill(unsigned long data)
 {
 	struct net_device *dev = (struct net_device *) data;
+	struct fe_priv *np = netdev_priv(dev);
 
 	/* Just reschedule NAPI rx processing */
-	netif_rx_schedule(dev);
+	netif_rx_schedule(dev, &np->napi);
 }
 #else
 static void nv_do_rx_refill(unsigned long data)
@@ -2997,7 +3001,7 @@ static irqreturn_t nv_nic_irq(int foo, void *data)
 
 #ifdef CONFIG_FORCEDETH_NAPI
 		if (events & NVREG_IRQ_RX_ALL) {
-			netif_rx_schedule(dev);
+			netif_rx_schedule(dev, &np->napi);
 
 			/* Disable furthur receive irq's */
 			spin_lock(&np->lock);
@@ -3010,7 +3014,7 @@ static irqreturn_t nv_nic_irq(int foo, void *data)
 			spin_unlock(&np->lock);
 		}
 #else
-		if (nv_rx_process(dev, dev->weight)) {
+		if (nv_rx_process(dev, RX_WORK_PER_LOOP)) {
 			if (unlikely(nv_alloc_rx(dev))) {
 				spin_lock(&np->lock);
 				if (!np->in_shutdown)
@@ -3114,7 +3118,7 @@ static irqreturn_t nv_nic_irq_optimized(int foo, void *data)
 
 #ifdef CONFIG_FORCEDETH_NAPI
 		if (events & NVREG_IRQ_RX_ALL) {
-			netif_rx_schedule(dev);
+			netif_rx_schedule(dev, &np->napi);
 
 			/* Disable furthur receive irq's */
 			spin_lock(&np->lock);
@@ -3127,7 +3131,7 @@ static irqreturn_t nv_nic_irq_optimized(int foo, void *data)
 			spin_unlock(&np->lock);
 		}
 #else
-		if (nv_rx_process_optimized(dev, dev->weight)) {
+		if (nv_rx_process_optimized(dev, RX_WORK_PER_LOOP)) {
 			if (unlikely(nv_alloc_rx_optimized(dev))) {
 				spin_lock(&np->lock);
 				if (!np->in_shutdown)
@@ -3245,19 +3249,19 @@ static irqreturn_t nv_nic_irq_tx(int foo, void *data)
 }
 
 #ifdef CONFIG_FORCEDETH_NAPI
-static int nv_napi_poll(struct net_device *dev, int *budget)
+static int nv_napi_poll(struct napi_struct *napi, int budget)
 {
-	int pkts, limit = min(*budget, dev->quota);
-	struct fe_priv *np = netdev_priv(dev);
+	struct fe_priv *np = container_of(napi, struct fe_priv, napi);
+	struct net_device *dev = np->dev;
 	u8 __iomem *base = get_hwbase(dev);
 	unsigned long flags;
-	int retcode;
+	int pkts, retcode;
 
 	if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
-		pkts = nv_rx_process(dev, limit);
+		pkts = nv_rx_process(dev, budget);
 		retcode = nv_alloc_rx(dev);
 	} else {
-		pkts = nv_rx_process_optimized(dev, limit);
+		pkts = nv_rx_process_optimized(dev, budget);
 		retcode = nv_alloc_rx_optimized(dev);
 	}
 
@@ -3268,13 +3272,12 @@ static int nv_napi_poll(struct net_device *dev, int *budget)
 		spin_unlock_irqrestore(&np->lock, flags);
 	}
 
-	if (pkts < limit) {
-		/* all done, no more packets present */
-		netif_rx_complete(dev);
-
+	if (pkts < budget) {
 		/* re-enable receive interrupts */
 		spin_lock_irqsave(&np->lock, flags);
 
+		__netif_rx_complete(dev, napi);
+
 		np->irqmask |= NVREG_IRQ_RX_ALL;
 		if (np->msi_flags & NV_MSI_X_ENABLED)
 			writel(NVREG_IRQ_RX_ALL, base + NvRegIrqMask);
@@ -3282,13 +3285,8 @@ static int nv_napi_poll(struct net_device *dev, int *budget)
 			writel(np->irqmask, base + NvRegIrqMask);
 
 		spin_unlock_irqrestore(&np->lock, flags);
-		return 0;
-	} else {
-		/* used up our quantum, so reschedule */
-		dev->quota -= pkts;
-		*budget -= pkts;
-		return 1;
 	}
+	return pkts;
 }
 #endif
 
@@ -3296,6 +3294,7 @@ static int nv_napi_poll(struct net_device *dev, int *budget)
 static irqreturn_t nv_nic_irq_rx(int foo, void *data)
 {
 	struct net_device *dev = (struct net_device *) data;
+	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
 	u32 events;
 
@@ -3303,7 +3302,7 @@ static irqreturn_t nv_nic_irq_rx(int foo, void *data)
 	writel(NVREG_IRQ_RX_ALL, base + NvRegMSIXIrqStatus);
 
 	if (events) {
-		netif_rx_schedule(dev);
+		netif_rx_schedule(dev, &np->napi);
 		/* disable receive interrupts on the nic */
 		writel(NVREG_IRQ_RX_ALL, base + NvRegIrqMask);
 		pci_push(base);
@@ -3329,7 +3328,7 @@ static irqreturn_t nv_nic_irq_rx(int foo, void *data)
 		if (!(events & np->irqmask))
 			break;
 
-		if (nv_rx_process_optimized(dev, dev->weight)) {
+		if (nv_rx_process_optimized(dev, RX_WORK_PER_LOOP)) {
 			if (unlikely(nv_alloc_rx_optimized(dev))) {
 				spin_lock_irqsave(&np->lock, flags);
 				if (!np->in_shutdown)
@@ -4620,7 +4619,7 @@ static void nv_self_test(struct net_device *dev, struct ethtool_test *test, u64
 	if (test->flags & ETH_TEST_FL_OFFLINE) {
 		if (netif_running(dev)) {
 			netif_stop_queue(dev);
-			netif_poll_disable(dev);
+			napi_disable(&np->napi);
 			netif_tx_lock_bh(dev);
 			spin_lock_irq(&np->lock);
 			nv_disable_hw_interrupts(dev, np->irqmask);
@@ -4679,7 +4678,7 @@ static void nv_self_test(struct net_device *dev, struct ethtool_test *test, u64
 			nv_start_rx(dev);
 			nv_start_tx(dev);
 			netif_start_queue(dev);
-			netif_poll_enable(dev);
+			napi_enable(&np->napi);
 			nv_enable_hw_interrupts(dev, np->irqmask);
 		}
 	}
@@ -4911,7 +4910,7 @@ static int nv_open(struct net_device *dev)
 	nv_start_rx(dev);
 	nv_start_tx(dev);
 	netif_start_queue(dev);
-	netif_poll_enable(dev);
+	napi_enable(&np->napi);
 
 	if (ret) {
 		netif_carrier_on(dev);
@@ -4942,7 +4941,7 @@ static int nv_close(struct net_device *dev)
 	spin_lock_irq(&np->lock);
 	np->in_shutdown = 1;
 	spin_unlock_irq(&np->lock);
-	netif_poll_disable(dev);
+	napi_disable(&np->napi);
 	synchronize_irq(dev->irq);
 
 	del_timer_sync(&np->oom_kick);
@@ -4994,6 +4993,7 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 		goto out;
 
 	np = netdev_priv(dev);
+	np->dev = dev;
 	np->pci_dev = pci_dev;
 	spin_lock_init(&np->lock);
 	SET_MODULE_OWNER(dev);
@@ -5155,9 +5155,8 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	dev->poll_controller = nv_poll_controller;
 #endif
-	dev->weight = RX_WORK_PER_LOOP;
 #ifdef CONFIG_FORCEDETH_NAPI
-	dev->poll = nv_napi_poll;
+	netif_napi_add(dev, &np->napi, nv_napi_poll, RX_WORK_PER_LOOP);
 #endif
 	SET_ETHTOOL_OPS(dev, &ops);
 	dev->tx_timeout = nv_tx_timeout;
diff --git a/drivers/net/fs_enet/fs_enet-main.c b/drivers/net/fs_enet/fs_enet-main.c
index a4a2a0e..43f86d2 100644
--- a/drivers/net/fs_enet/fs_enet-main.c
+++ b/drivers/net/fs_enet/fs_enet-main.c
@@ -70,18 +70,16 @@ static void fs_set_multicast_list(struct net_device *dev)
 }
 
 /* NAPI receive function */
-static int fs_enet_rx_napi(struct net_device *dev, int *budget)
+static int fs_enet_rx_napi(struct napi_struct *napi, int budget)
 {
-	struct fs_enet_private *fep = netdev_priv(dev);
+	struct fs_enet_private *fep = container_of(napi, struct fec_enet_private, napi);
+	struct net_device *dev = fep->dev;
 	const struct fs_platform_info *fpi = fep->fpi;
 	cbd_t *bdp;
 	struct sk_buff *skb, *skbn, *skbt;
 	int received = 0;
 	u16 pkt_len, sc;
 	int curidx;
-	int rx_work_limit = 0;	/* pacify gcc */
-
-	rx_work_limit = min(dev->quota, *budget);
 
 	if (!netif_running(dev))
 		return 0;
@@ -96,7 +94,6 @@ static int fs_enet_rx_napi(struct net_device *dev, int *budget)
 	(*fep->ops->napi_clear_rx_event)(dev);
 
 	while (((sc = CBDR_SC(bdp)) & BD_ENET_RX_EMPTY) == 0) {
-
 		curidx = bdp - fep->rx_bd_base;
 
 		/*
@@ -136,11 +133,6 @@ static int fs_enet_rx_napi(struct net_device *dev, int *budget)
 			skbn = skb;
 
 		} else {
-
-			/* napi, got packet but no quota */
-			if (--rx_work_limit < 0)
-				break;
-
 			skb = fep->rx_skbuff[curidx];
 
 			dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp),
@@ -199,22 +191,19 @@ static int fs_enet_rx_napi(struct net_device *dev, int *budget)
 			bdp = fep->rx_bd_base;
 
 		(*fep->ops->rx_bd_done)(dev);
+
+		if (received >= budget)
+			break;
 	}
 
 	fep->cur_rx = bdp;
 
-	dev->quota -= received;
-	*budget -= received;
-
-	if (rx_work_limit < 0)
-		return 1;	/* not done */
-
-	/* done */
-	netif_rx_complete(dev);
-
-	(*fep->ops->napi_enable_rx)(dev);
-
-	return 0;
+	if (received >= budget) {
+		/* done */
+		netif_rx_complete(dev, napi);
+		(*fep->ops->napi_enable_rx)(dev);
+	}
+	return received;
 }
 
 /* non NAPI receive function */
@@ -470,7 +459,7 @@ fs_enet_interrupt(int irq, void *dev_id)
 			if (!fpi->use_napi)
 				fs_enet_rx_non_napi(dev);
 			else {
-				napi_ok = netif_rx_schedule_prep(dev);
+				napi_ok = napi_schedule_prep(&fep->napi);
 
 				(*fep->ops->napi_disable_rx)(dev);
 				(*fep->ops->clear_int_events)(dev, fep->ev_napi_rx);
@@ -478,7 +467,7 @@ fs_enet_interrupt(int irq, void *dev_id)
 				/* NOTE: it is possible for FCCs in NAPI mode    */
 				/* to submit a spurious interrupt while in poll  */
 				if (napi_ok)
-					__netif_rx_schedule(dev);
+					__netif_rx_schedule(dev, &fep->napi);
 			}
 		}
 
@@ -823,6 +812,7 @@ static int fs_enet_close(struct net_device *dev)
 
 	netif_stop_queue(dev);
 	netif_carrier_off(dev);
+	napi_disable(&fep->napi);
 	phy_stop(fep->phydev);
 
 	spin_lock_irqsave(&fep->lock, flags);
@@ -1047,10 +1037,10 @@ static struct net_device *fs_init_instance(struct device *dev,
 	ndev->stop = fs_enet_close;
 	ndev->get_stats = fs_enet_get_stats;
 	ndev->set_multicast_list = fs_set_multicast_list;
-	if (fpi->use_napi) {
-		ndev->poll = fs_enet_rx_napi;
-		ndev->weight = fpi->napi_weight;
-	}
+	if (fpi->use_napi)
+		netif_napi_add(ndev, &fep->napi,
+			       fs_enet_rx_napi, fpi->napi_weight);
+
 	ndev->ethtool_ops = &fs_ethtool_ops;
 	ndev->do_ioctl = fs_ioctl;
 
diff --git a/drivers/net/fs_enet/fs_enet.h b/drivers/net/fs_enet/fs_enet.h
index 569be22..46d0606 100644
--- a/drivers/net/fs_enet/fs_enet.h
+++ b/drivers/net/fs_enet/fs_enet.h
@@ -121,6 +121,7 @@ struct fs_enet_mii_bus {
 };
 
 struct fs_enet_private {
+	struct napi_struct napi;
 	struct device *dev;	/* pointer back to the device (must be initialized first) */
 	spinlock_t lock;	/* during all ops except TX pckt processing */
 	spinlock_t tx_lock;	/* during fs_start_xmit and fs_tx         */
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index f926905..89c2fb4 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -134,7 +134,7 @@ static void gfar_configure_serdes(struct net_device *dev);
 extern int gfar_local_mdio_write(struct gfar_mii *regs, int mii_id, int regnum, u16 value);
 extern int gfar_local_mdio_read(struct gfar_mii *regs, int mii_id, int regnum);
 #ifdef CONFIG_GFAR_NAPI
-static int gfar_poll(struct net_device *dev, int *budget);
+static int gfar_poll(struct napi_struct *napi, int budget);
 #endif
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void gfar_netpoll(struct net_device *dev);
@@ -188,6 +188,7 @@ static int gfar_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	priv = netdev_priv(dev);
+	priv->dev = dev;
 
 	/* Set the info in the priv to the current info */
 	priv->einfo = einfo;
@@ -262,8 +263,7 @@ static int gfar_probe(struct platform_device *pdev)
 	dev->tx_timeout = gfar_timeout;
 	dev->watchdog_timeo = TX_TIMEOUT;
 #ifdef CONFIG_GFAR_NAPI
-	dev->poll = gfar_poll;
-	dev->weight = GFAR_DEV_WEIGHT;
+	netif_napi_add(dev, &priv->napi, gfar_poll, GFAR_DEV_WEIGHT);
 #endif
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	dev->poll_controller = gfar_netpoll;
@@ -1102,6 +1102,9 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
 static int gfar_close(struct net_device *dev)
 {
 	struct gfar_private *priv = netdev_priv(dev);
+
+	napi_disable(&priv->napi);
+
 	stop_gfar(dev);
 
 	/* Disconnect from the PHY */
@@ -1318,7 +1321,7 @@ struct sk_buff * gfar_new_skb(struct net_device *dev, struct rxbd8 *bdp)
 		return NULL;
 
 	alignamount = RXBUF_ALIGNMENT -
-		(((unsigned) skb->data) & (RXBUF_ALIGNMENT - 1));
+		(((unsigned long) skb->data) & (RXBUF_ALIGNMENT - 1));
 
 	/* We need the data buffer to be aligned properly.  We will reserve
 	 * as many bytes as needed to align the data properly
@@ -1390,12 +1393,12 @@ irqreturn_t gfar_receive(int irq, void *dev_id)
 
 	/* support NAPI */
 #ifdef CONFIG_GFAR_NAPI
-	if (netif_rx_schedule_prep(dev)) {
+	if (netif_rx_schedule_prep(dev, &priv->napi)) {
 		tempval = gfar_read(&priv->regs->imask);
 		tempval &= IMASK_RX_DISABLED;
 		gfar_write(&priv->regs->imask, tempval);
 
-		__netif_rx_schedule(dev);
+		__netif_rx_schedule(dev, &priv->napi);
 	} else {
 		if (netif_msg_rx_err(priv))
 			printk(KERN_DEBUG "%s: receive called twice (%x)[%x]\n",
@@ -1569,23 +1572,16 @@ int gfar_clean_rx_ring(struct net_device *dev, int rx_work_limit)
 }
 
 #ifdef CONFIG_GFAR_NAPI
-static int gfar_poll(struct net_device *dev, int *budget)
+static int gfar_poll(struct napi_struct *napi, int budget)
 {
+	struct gfar_private *priv = container_of(napi, struct gfar_private, napi);
+	struct net_device *dev = priv->dev;
 	int howmany;
-	struct gfar_private *priv = netdev_priv(dev);
-	int rx_work_limit = *budget;
 
-	if (rx_work_limit > dev->quota)
-		rx_work_limit = dev->quota;
+	howmany = gfar_clean_rx_ring(dev, budget);
 
-	howmany = gfar_clean_rx_ring(dev, rx_work_limit);
-
-	dev->quota -= howmany;
-	rx_work_limit -= howmany;
-	*budget -= howmany;
-
-	if (rx_work_limit > 0) {
-		netif_rx_complete(dev);
+	if (howmany < budget) {
+		netif_rx_complete(dev, napi);
 
 		/* Clear the halt bit in RSTAT */
 		gfar_write(&priv->regs->rstat, RSTAT_CLEAR_RHALT);
@@ -1601,8 +1597,7 @@ static int gfar_poll(struct net_device *dev, int *budget)
 			gfar_write(&priv->regs->rxic, 0);
 	}
 
-	/* Return 1 if there's more work to do */
-	return (rx_work_limit > 0) ? 0 : 1;
+	return howmany;
 }
 #endif
 
diff --git a/drivers/net/gianfar.h b/drivers/net/gianfar.h
index d8e779c..b8714e0 100644
--- a/drivers/net/gianfar.h
+++ b/drivers/net/gianfar.h
@@ -691,6 +691,9 @@ struct gfar_private {
 	/* RX Locked fields */
 	spinlock_t rxlock;
 
+	struct net_device *dev;
+	struct napi_struct napi;
+
 	/* skb array and index */
 	struct sk_buff ** rx_skbuff;
 	u16 skb_currx;
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index d96eb72..62ff425 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -83,7 +83,7 @@
 static int ibmveth_open(struct net_device *dev);
 static int ibmveth_close(struct net_device *dev);
 static int ibmveth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
-static int ibmveth_poll(struct net_device *dev, int *budget);
+static int ibmveth_poll(struct napi_struct *napi, int budget);
 static int ibmveth_start_xmit(struct sk_buff *skb, struct net_device *dev);
 static struct net_device_stats *ibmveth_get_stats(struct net_device *dev);
 static void ibmveth_set_multicast_list(struct net_device *dev);
@@ -587,6 +587,8 @@ static int ibmveth_close(struct net_device *netdev)
 
 	ibmveth_debug_printk("close starting\n");
 
+	napi_disable(&adapter->napi);
+
 	if (!adapter->pool_config)
 		netif_stop_queue(netdev);
 
@@ -767,80 +769,68 @@ out:	spin_lock_irqsave(&adapter->stats_lock, flags);
 	return 0;
 }
 
-static int ibmveth_poll(struct net_device *netdev, int *budget)
+static int ibmveth_poll(struct napi_struct *napi, int budget)
 {
-	struct ibmveth_adapter *adapter = netdev->priv;
-	int max_frames_to_process = netdev->quota;
+	struct ibmveth_adapter *adapter = container_of(napi, struct ibmveth_adapter, napi);
+	struct net_device *netdev = adapter->netdev;
 	int frames_processed = 0;
-	int more_work = 1;
 	unsigned long lpar_rc;
 
  restart_poll:
 	do {
-		struct net_device *netdev = adapter->netdev;
-
-		if(ibmveth_rxq_pending_buffer(adapter)) {
-			struct sk_buff *skb;
+		struct sk_buff *skb;
 
-			rmb();
+		if (!ibmveth_rxq_pending_buffer(adapter))
+			break;
 
-			if(!ibmveth_rxq_buffer_valid(adapter)) {
-				wmb(); /* suggested by larson1 */
-				adapter->rx_invalid_buffer++;
-				ibmveth_debug_printk("recycling invalid buffer\n");
-				ibmveth_rxq_recycle_buffer(adapter);
-			} else {
-				int length = ibmveth_rxq_frame_length(adapter);
-				int offset = ibmveth_rxq_frame_offset(adapter);
-				skb = ibmveth_rxq_get_buffer(adapter);
+		rmb();
+		if (!ibmveth_rxq_buffer_valid(adapter)) {
+			wmb(); /* suggested by larson1 */
+			adapter->rx_invalid_buffer++;
+			ibmveth_debug_printk("recycling invalid buffer\n");
+			ibmveth_rxq_recycle_buffer(adapter);
+		} else {
+			int length = ibmveth_rxq_frame_length(adapter);
+			int offset = ibmveth_rxq_frame_offset(adapter);
+			skb = ibmveth_rxq_get_buffer(adapter);
 
-				ibmveth_rxq_harvest_buffer(adapter);
+			ibmveth_rxq_harvest_buffer(adapter);
 
-				skb_reserve(skb, offset);
-				skb_put(skb, length);
-				skb->protocol = eth_type_trans(skb, netdev);
+			skb_reserve(skb, offset);
+			skb_put(skb, length);
+			skb->protocol = eth_type_trans(skb, netdev);
 
-				netif_receive_skb(skb);	/* send it up */
+			netif_receive_skb(skb);	/* send it up */
 
-				adapter->stats.rx_packets++;
-				adapter->stats.rx_bytes += length;
-				frames_processed++;
-				netdev->last_rx = jiffies;
-			}
-		} else {
-			more_work = 0;
+			adapter->stats.rx_packets++;
+			adapter->stats.rx_bytes += length;
+			frames_processed++;
+			netdev->last_rx = jiffies;
 		}
-	} while(more_work && (frames_processed < max_frames_to_process));
+	} while (frames_processed < budget);
 
 	ibmveth_replenish_task(adapter);
 
-	if(more_work) {
-		/* more work to do - return that we are not done yet */
-		netdev->quota -= frames_processed;
-		*budget -= frames_processed;
-		return 1;
-	}
+	if (frames_processed < budget) {
+		/* We think we are done - reenable interrupts,
+		 * then check once more to make sure we are done.
+		 */
+		lpar_rc = h_vio_signal(adapter->vdev->unit_address,
+				       VIO_IRQ_ENABLE);
 
-	/* we think we are done - reenable interrupts, then check once more to make sure we are done */
-	lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_ENABLE);
-
-	ibmveth_assert(lpar_rc == H_SUCCESS);
+		ibmveth_assert(lpar_rc == H_SUCCESS);
 
-	netif_rx_complete(netdev);
+		netif_rx_complete(netdev, napi);
 
-	if(ibmveth_rxq_pending_buffer(adapter) && netif_rx_reschedule(netdev, frames_processed))
-	{
-		lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE);
-		ibmveth_assert(lpar_rc == H_SUCCESS);
-		more_work = 1;
-		goto restart_poll;
+		if (ibmveth_rxq_pending_buffer(adapter) &&
+		    netif_rx_reschedule(netdev, napi)) {
+			lpar_rc = h_vio_signal(adapter->vdev->unit_address,
+					       VIO_IRQ_DISABLE);
+			goto restart_poll;
+		}
 	}
 
-	netdev->quota -= frames_processed;
-	*budget -= frames_processed;
-
-	/* we really are done */
-	return 0;
+	return frames_processed;
 }
 
 static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance)
@@ -849,10 +839,11 @@ static irqreturn_t ibmveth_interrupt(int irq, void *dev_instance)
 	struct ibmveth_adapter *adapter = netdev->priv;
 	unsigned long lpar_rc;
 
-	if(netif_rx_schedule_prep(netdev)) {
-		lpar_rc = h_vio_signal(adapter->vdev->unit_address, VIO_IRQ_DISABLE);
+	if (netif_rx_schedule_prep(netdev, &adapter->napi)) {
+		lpar_rc = h_vio_signal(adapter->vdev->unit_address,
+				       VIO_IRQ_DISABLE);
 		ibmveth_assert(lpar_rc == H_SUCCESS);
-		__netif_rx_schedule(netdev);
+		__netif_rx_schedule(netdev, &adapter->napi);
 	}
 	return IRQ_HANDLED;
 }
@@ -1005,6 +996,8 @@ static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_
 	adapter->mcastFilterSize= *mcastFilterSize_p;
 	adapter->pool_config = 0;
 
+	netif_napi_add(netdev, &adapter->napi, ibmveth_poll, 16);
+
 	/* 	Some older boxes running PHYP non-natively have an OF that
 		returns a 8-byte local-mac-address field (and the first
 		2 bytes have to be ignored) while newer boxes' OF return
@@ -1021,8 +1014,6 @@ static int __devinit ibmveth_probe(struct vio_dev *dev, const struct vio_device_
 
 	netdev->irq = dev->irq;
 	netdev->open               = ibmveth_open;
-	netdev->poll               = ibmveth_poll;
-	netdev->weight             = 16;
 	netdev->stop               = ibmveth_close;
 	netdev->hard_start_xmit    = ibmveth_start_xmit;
 	netdev->get_stats          = ibmveth_get_stats;
diff --git a/drivers/net/ibmveth.h b/drivers/net/ibmveth.h
index bb69cca..bb2e5e1 100644
--- a/drivers/net/ibmveth.h
+++ b/drivers/net/ibmveth.h
@@ -115,6 +115,7 @@ struct ibmveth_rx_q {
 struct ibmveth_adapter {
     struct vio_dev *vdev;
     struct net_device *netdev;
+    struct napi_struct napi;
     struct net_device_stats stats;
     unsigned int mcastFilterSize;
     unsigned long mac_addr;
diff --git a/drivers/net/ixgb/ixgb.h b/drivers/net/ixgb/ixgb.h
index 3569d5b..1eee889 100644
--- a/drivers/net/ixgb/ixgb.h
+++ b/drivers/net/ixgb/ixgb.h
@@ -184,6 +184,7 @@ struct ixgb_adapter {
 	boolean_t rx_csum;
 
 	/* OS defined structs */
+	struct napi_struct napi;
 	struct net_device *netdev;
 	struct pci_dev *pdev;
 	struct net_device_stats net_stats;
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index 991c883..e3f27c6 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -97,7 +97,7 @@ static irqreturn_t ixgb_intr(int irq, void *data);
 static boolean_t ixgb_clean_tx_irq(struct ixgb_adapter *adapter);
 
 #ifdef CONFIG_IXGB_NAPI
-static int ixgb_clean(struct net_device *netdev, int *budget);
+static int ixgb_clean(struct napi_struct *napi, int budget);
 static boolean_t ixgb_clean_rx_irq(struct ixgb_adapter *adapter,
 				   int *work_done, int work_to_do);
 #else
@@ -288,7 +288,7 @@ ixgb_up(struct ixgb_adapter *adapter)
 	mod_timer(&adapter->watchdog_timer, jiffies);
 
 #ifdef CONFIG_IXGB_NAPI
-	netif_poll_enable(netdev);
+	napi_enable(&adapter->napi);
 #endif
 	ixgb_irq_enable(adapter);
 
@@ -309,7 +309,7 @@ ixgb_down(struct ixgb_adapter *adapter, boolean_t kill_watchdog)
 	if(kill_watchdog)
 		del_timer_sync(&adapter->watchdog_timer);
 #ifdef CONFIG_IXGB_NAPI
-	netif_poll_disable(netdev);
+	napi_disable(&adapter->napi);
 #endif
 	adapter->link_speed = 0;
 	adapter->link_duplex = 0;
@@ -421,8 +421,7 @@ ixgb_probe(struct pci_dev *pdev,
 	netdev->tx_timeout = &ixgb_tx_timeout;
 	netdev->watchdog_timeo = 5 * HZ;
 #ifdef CONFIG_IXGB_NAPI
-	netdev->poll = &ixgb_clean;
-	netdev->weight = 64;
+	netif_napi_add(netdev, &adapter->napi, ixgb_clean, 64);
 #endif
 	netdev->vlan_rx_register = ixgb_vlan_rx_register;
 	netdev->vlan_rx_add_vid = ixgb_vlan_rx_add_vid;
@@ -1746,7 +1745,7 @@ ixgb_intr(int irq, void *data)
 	}
 
 #ifdef CONFIG_IXGB_NAPI
-	if(netif_rx_schedule_prep(netdev)) {
+	if (netif_rx_schedule_prep(netdev, &adapter->napi)) {
 
 		/* Disable interrupts and register for poll. The flush 
 		  of the posted write is intentionally left out.
@@ -1754,7 +1753,7 @@ ixgb_intr(int irq, void *data)
 
 		atomic_inc(&adapter->irq_sem);
 		IXGB_WRITE_REG(&adapter->hw, IMC, ~0);
-		__netif_rx_schedule(netdev);
+		__netif_rx_schedule(netdev, &adapter->napi);
 	}
 #else
 	/* yes, that is actually a & and it is meant to make sure that
@@ -1776,27 +1775,23 @@ ixgb_intr(int irq, void *data)
  **/
 
 static int
-ixgb_clean(struct net_device *netdev, int *budget)
+ixgb_clean(struct napi_struct *napi, int budget)
 {
-	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	int work_to_do = min(*budget, netdev->quota);
+	struct ixgb_adapter *adapter = container_of(napi, struct ixgb_adapter, napi);
+	struct net_device *netdev = adapter->netdev;
 	int tx_cleaned;
 	int work_done = 0;
 
 	tx_cleaned = ixgb_clean_tx_irq(adapter);
-	ixgb_clean_rx_irq(adapter, &work_done, work_to_do);
-
-	*budget -= work_done;
-	netdev->quota -= work_done;
+	ixgb_clean_rx_irq(adapter, &work_done, budget);
 
 	/* if no Tx and not enough Rx work done, exit the polling mode */
 	if((!tx_cleaned && (work_done == 0)) || !netif_running(netdev)) {
-		netif_rx_complete(netdev);
+		netif_rx_complete(netdev, napi);
 		ixgb_irq_enable(adapter);
-		return 0;
 	}
 
-	return 1;
+	return work_done;
 }
 #endif
 
diff --git a/drivers/net/ixp2000/ixpdev.c b/drivers/net/ixp2000/ixpdev.c
index d9ce1ae..5148a92 100644
--- a/drivers/net/ixp2000/ixpdev.c
+++ b/drivers/net/ixp2000/ixpdev.c
@@ -74,9 +74,9 @@ static int ixpdev_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 
-static int ixpdev_rx(struct net_device *dev, int *budget)
+static int ixpdev_rx(struct net_device *dev, int processed, int budget)
 {
-	while (*budget > 0) {
+	while (processed < budget) {
 		struct ixpdev_rx_desc *desc;
 		struct sk_buff *skb;
 		void *buf;
@@ -122,29 +122,34 @@ static int ixpdev_rx(struct net_device *dev, int *budget)
 
 err:
 		ixp2000_reg_write(RING_RX_PENDING, _desc);
-		dev->quota--;
-		(*budget)--;
+		processed++;
 	}
 
-	return 1;
+	return processed;
 }
 
 /* dev always points to nds[0].  */
-static int ixpdev_poll(struct net_device *dev, int *budget)
+static int ixpdev_poll(struct napi_struct *napi, int budget)
 {
+	struct ixpdev_priv *ip = container_of(napi, struct ixpdev_priv, napi);
+	struct net_device *dev = ip->dev;
+	int rx;
+
 	/* @@@ Have to stop polling when nds[0] is administratively
 	 * downed while we are polling.  */
+	rx = 0;
 	do {
 		ixp2000_reg_write(IXP2000_IRQ_THD_RAW_STATUS_A_0, 0x00ff);
 
-		if (ixpdev_rx(dev, budget))
-			return 1;
+		rx = ixpdev_rx(dev, rx, budget);
+		if (rx >= budget)
+			break;
 	} while (ixp2000_reg_read(IXP2000_IRQ_THD_RAW_STATUS_A_0) & 0x00ff);
 
-	netif_rx_complete(dev);
+	netif_rx_complete(dev, napi);
 	ixp2000_reg_write(IXP2000_IRQ_THD_ENABLE_SET_A_0, 0x00ff);
 
-	return 0;
+	return rx;
 }
 
 static void ixpdev_tx_complete(void)
@@ -199,9 +204,12 @@ static irqreturn_t ixpdev_interrupt(int irq, void *dev_id)
 	 * Any of the eight receive units signaled RX?
 	 */
 	if (status & 0x00ff) {
+		struct net_device *dev = nds[0];
+		struct ixpdev_priv *ip = netdev_priv(dev);
+
 		ixp2000_reg_wrb(IXP2000_IRQ_THD_ENABLE_CLEAR_A_0, 0x00ff);
-		if (likely(__netif_rx_schedule_prep(nds[0]))) {
-			__netif_rx_schedule(nds[0]);
+		if (likely(napi_schedule_prep(&ip->napi))) {
+			__netif_rx_schedule(dev, &ip->napi);
 		} else {
 			printk(KERN_CRIT "ixp2000: irq while polling!!\n");
 		}
@@ -254,6 +262,7 @@ static int ixpdev_close(struct net_device *dev)
 	struct ixpdev_priv *ip = netdev_priv(dev);
 
 	netif_stop_queue(dev);
+	napi_disable(&ip->napi);
 	set_port_admin_status(ip->channel, 0);
 
 	if (!--nds_open) {
@@ -274,7 +283,6 @@ struct net_device *ixpdev_alloc(int channel, int sizeof_priv)
 		return NULL;
 
 	dev->hard_start_xmit = ixpdev_xmit;
-	dev->poll = ixpdev_poll;
 	dev->open = ixpdev_open;
 	dev->stop = ixpdev_close;
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -282,9 +290,10 @@ struct net_device *ixpdev_alloc(int channel, int sizeof_priv)
 #endif
 
 	dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
-	dev->weight = 64;
 
 	ip = netdev_priv(dev);
+	ip->dev = dev;
+	netif_napi_add(dev, &ip->napi, ixpdev_poll, 64);
 	ip->channel = channel;
 	ip->tx_queue_entries = 0;
 
diff --git a/drivers/net/ixp2000/ixpdev.h b/drivers/net/ixp2000/ixpdev.h
index bd686cb..391ece6 100644
--- a/drivers/net/ixp2000/ixpdev.h
+++ b/drivers/net/ixp2000/ixpdev.h
@@ -14,6 +14,8 @@
 
 struct ixpdev_priv
 {
+	struct net_device *dev;
+	struct napi_struct napi;
 	int	channel;
 	int	tx_queue_entries;
 };
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index a4bb026..0bd7d21 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -470,22 +470,24 @@ static int macb_rx(struct macb *bp, int budget)
 	return received;
 }
 
-static int macb_poll(struct net_device *dev, int *budget)
+static int macb_poll(struct napi_struct *napi, int budget)
 {
-	struct macb *bp = netdev_priv(dev);
-	int orig_budget, work_done, retval = 0;
+	struct macb *bp = container_of(napi, struct macb, napi);
+	struct net_device *dev = bp->dev;
+	int work_done;
 	u32 status;
 
 	status = macb_readl(bp, RSR);
 	macb_writel(bp, RSR, status);
 
+	work_done = 0;
 	if (!status) {
 		/*
 		 * This may happen if an interrupt was pending before
 		 * this function was called last time, and no packets
 		 * have been received since.
 		 */
-		netif_rx_complete(dev);
+		netif_rx_complete(dev, napi);
 		goto out;
 	}
 
@@ -496,21 +498,13 @@ static int macb_poll(struct net_device *dev, int *budget)
 		dev_warn(&bp->pdev->dev,
 			 "No RX buffers complete, status = %02lx\n",
 			 (unsigned long)status);
-		netif_rx_complete(dev);
+		netif_rx_complete(dev, napi);
 		goto out;
 	}
 
-	orig_budget = *budget;
-	if (orig_budget > dev->quota)
-		orig_budget = dev->quota;
-
-	work_done = macb_rx(bp, orig_budget);
-	if (work_done < orig_budget) {
-		netif_rx_complete(dev);
-		retval = 0;
-	} else {
-		retval = 1;
-	}
+	work_done = macb_rx(bp, budget);
+	if (work_done < orig_budget)
+		netif_rx_complete(dev, napi);
 
 	/*
 	 * We've done what we can to clean the buffers. Make sure we
@@ -521,7 +515,7 @@ out:
 
 	/* TODO: Handle errors */
 
-	return retval;
+	return work_done;
 }
 
 static irqreturn_t macb_interrupt(int irq, void *dev_id)
@@ -545,7 +539,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 		}
 
 		if (status & MACB_RX_INT_FLAGS) {
-			if (netif_rx_schedule_prep(dev)) {
+			if (netif_rx_schedule_prep(dev, &bp->napi)) {
 				/*
 				 * There's no point taking any more interrupts
 				 * until we have processed the buffers
@@ -553,7 +547,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 				macb_writel(bp, IDR, MACB_RX_INT_FLAGS);
 				dev_dbg(&bp->pdev->dev,
 					"scheduling RX softirq\n");
-				__netif_rx_schedule(dev);
+				__netif_rx_schedule(dev, &bp->napi);
 			}
 		}
 
@@ -954,6 +948,7 @@ static int macb_close(struct net_device *dev)
 	unsigned long flags;
 
 	netif_stop_queue(dev);
+	napi_disable(&bp->napi);
 
 	if (bp->phy_dev)
 		phy_stop(bp->phy_dev);
@@ -1146,8 +1141,7 @@ static int __devinit macb_probe(struct platform_device *pdev)
 	dev->get_stats = macb_get_stats;
 	dev->set_multicast_list = macb_set_rx_mode;
 	dev->do_ioctl = macb_ioctl;
-	dev->poll = macb_poll;
-	dev->weight = 64;
+	netif_napi_add(dev, &bp->napi, macb_poll, 64);
 	dev->ethtool_ops = &macb_ethtool_ops;
 
 	dev->base_addr = regs->start;
diff --git a/drivers/net/macb.h b/drivers/net/macb.h
index 4e3283e..57b85ac 100644
--- a/drivers/net/macb.h
+++ b/drivers/net/macb.h
@@ -374,6 +374,7 @@ struct macb {
 	struct clk		*pclk;
 	struct clk		*hclk;
 	struct net_device	*dev;
+	struct napi_struct	napi;
 	struct net_device_stats	stats;
 	struct macb_stats	hw_stats;
 
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index 1799eee..1469a97 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -66,7 +66,7 @@ static int mv643xx_eth_change_mtu(struct net_device *, int);
 static struct net_device_stats *mv643xx_eth_get_stats(struct net_device *);
 static void eth_port_init_mac_tables(unsigned int eth_port_num);
 #ifdef MV643XX_NAPI
-static int mv643xx_poll(struct net_device *dev, int *budget);
+static int mv643xx_poll(struct net_device *dev, int budget);
 #endif
 static int ethernet_phy_get(unsigned int eth_port_num);
 static void ethernet_phy_set(unsigned int eth_port_num, int phy_addr);
@@ -562,7 +562,7 @@ static irqreturn_t mv643xx_eth_int_handler(int irq, void *dev_id)
 		/* wait for previous write to complete */
 		mv_read(MV643XX_ETH_INTERRUPT_MASK_REG(port_num));
 
-		netif_rx_schedule(dev);
+		netif_rx_schedule(dev, &bp->napi);
 	}
 #else
 	if (eth_int_cause & ETH_INT_CAUSE_RX)
@@ -982,7 +982,7 @@ static int mv643xx_eth_stop(struct net_device *dev)
 	mv_read(MV643XX_ETH_INTERRUPT_MASK_REG(port_num));
 
 #ifdef MV643XX_NAPI
-	netif_poll_disable(dev);
+	napi_disable(&mp->napi);
 #endif
 	netif_carrier_off(dev);
 	netif_stop_queue(dev);
@@ -992,10 +992,6 @@ static int mv643xx_eth_stop(struct net_device *dev)
 	mv643xx_eth_free_tx_rings(dev);
 	mv643xx_eth_free_rx_rings(dev);
 
-#ifdef MV643XX_NAPI
-	netif_poll_enable(dev);
-#endif
-
 	free_irq(dev->irq, dev);
 
 	return 0;
@@ -1007,11 +1003,12 @@ static int mv643xx_eth_stop(struct net_device *dev)
  *
  * This function is used in case of NAPI
  */
-static int mv643xx_poll(struct net_device *dev, int *budget)
+static int mv643xx_poll(struct napi_struct *napi, int budget)
 {
-	struct mv643xx_private *mp = netdev_priv(dev);
-	int done = 1, orig_budget, work_done;
+	struct mv643xx_private *mp = container_of(napi, struct mv643xx_private, napi);
+	struct net_device *dev = mp->dev;
 	unsigned int port_num = mp->port_num;
+	int work_done;
 
 #ifdef MV643XX_TX_FAST_REFILL
 	if (++mp->tx_clean_threshold > 5) {
@@ -1020,27 +1017,20 @@ static int mv643xx_poll(struct net_device *dev, int *budget)
 	}
 #endif
 
+	work_done = 0;
 	if ((mv_read(MV643XX_ETH_RX_CURRENT_QUEUE_DESC_PTR_0(port_num)))
-						!= (u32) mp->rx_used_desc_q) {
-		orig_budget = *budget;
-		if (orig_budget > dev->quota)
-			orig_budget = dev->quota;
-		work_done = mv643xx_eth_receive_queue(dev, orig_budget);
-		*budget -= work_done;
-		dev->quota -= work_done;
-		if (work_done >= orig_budget)
-			done = 0;
-	}
+	    != (u32) mp->rx_used_desc_q)
+		work_done = mv643xx_eth_receive_queue(dev, budget);
 
-	if (done) {
-		netif_rx_complete(dev);
+	if (work_done < budget) {
+		netif_rx_complete(dev, napi);
 		mv_write(MV643XX_ETH_INTERRUPT_CAUSE_REG(port_num), 0);
 		mv_write(MV643XX_ETH_INTERRUPT_CAUSE_EXTEND_REG(port_num), 0);
 		mv_write(MV643XX_ETH_INTERRUPT_MASK_REG(port_num),
 						ETH_INT_UNMASK_ALL);
 	}
 
-	return done ? 0 : 1;
+	return work_done;
 }
 #endif
 
@@ -1333,6 +1323,10 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, dev);
 
 	mp = netdev_priv(dev);
+	mp->dev = dev;
+#ifdef MV643XX_NAPI
+	netif_napi_add(dev, &mp->napi, mv643xx_poll, 64);
+#endif
 
 	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
 	BUG_ON(!res);
@@ -1347,10 +1341,6 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
 
 	/* No need to Tx Timeout */
 	dev->tx_timeout = mv643xx_eth_tx_timeout;
-#ifdef MV643XX_NAPI
-	dev->poll = mv643xx_poll;
-	dev->weight = 64;
-#endif
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	dev->poll_controller = mv643xx_netpoll;
diff --git a/drivers/net/mv643xx_eth.h b/drivers/net/mv643xx_eth.h
index 82f8c0c..6f2aaa6 100644
--- a/drivers/net/mv643xx_eth.h
+++ b/drivers/net/mv643xx_eth.h
@@ -318,6 +318,8 @@ struct mv643xx_private {
 
 	struct work_struct tx_timeout_task;
 
+	struct net_device *dev;
+	struct napi_struct napi;
 	struct net_device_stats stats;
 	struct mv643xx_mib_counters mib_counters;
 	spinlock_t lock;
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index deca653..3cd7921 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -163,6 +163,7 @@ struct myri10ge_priv {
 	int small_bytes;
 	int big_bytes;
 	struct net_device *dev;
+	struct napi_struct napi;
 	struct net_device_stats stats;
 	u8 __iomem *sram;
 	int sram_size;
@@ -1099,7 +1100,7 @@ static inline void myri10ge_tx_done(struct myri10ge_priv *mgp, int mcp_index)
 	}
 }
 
-static inline void myri10ge_clean_rx_done(struct myri10ge_priv *mgp, int *limit)
+static inline int myri10ge_clean_rx_done(struct myri10ge_priv *mgp, int budget)
 {
 	struct myri10ge_rx_done *rx_done = &mgp->rx_done;
 	unsigned long rx_bytes = 0;
@@ -1108,10 +1109,11 @@ static inline void myri10ge_clean_rx_done(struct myri10ge_priv *mgp, int *limit)
 
 	int idx = rx_done->idx;
 	int cnt = rx_done->cnt;
+	int work_done = 0;
 	u16 length;
 	__wsum checksum;
 
-	while (rx_done->entry[idx].length != 0 && *limit != 0) {
+	while (rx_done->entry[idx].length != 0 && work_done++ < budget) {
 		length = ntohs(rx_done->entry[idx].length);
 		rx_done->entry[idx].length = 0;
 		checksum = csum_unfold(rx_done->entry[idx].checksum);
@@ -1127,10 +1129,6 @@ static inline void myri10ge_clean_rx_done(struct myri10ge_priv *mgp, int *limit)
 		rx_bytes += rx_ok * (unsigned long)length;
 		cnt++;
 		idx = cnt & (myri10ge_max_intr_slots - 1);
-
-		/* limit potential for livelock by only handling a
-		 * limited number of frames. */
-		(*limit)--;
 	}
 	rx_done->idx = idx;
 	rx_done->cnt = cnt;
@@ -1144,6 +1142,7 @@ static inline void myri10ge_clean_rx_done(struct myri10ge_priv *mgp, int *limit)
 	if (mgp->rx_big.fill_cnt - mgp->rx_big.cnt < myri10ge_fill_thresh)
 		myri10ge_alloc_rx_pages(mgp, &mgp->rx_big, mgp->big_bytes, 0);
 
+	return work_done;
 }
 
 static inline void myri10ge_check_statblock(struct myri10ge_priv *mgp)
@@ -1188,26 +1187,21 @@ static inline void myri10ge_check_statblock(struct myri10ge_priv *mgp)
 	}
 }
 
-static int myri10ge_poll(struct net_device *netdev, int *budget)
+static int myri10ge_poll(struct napi_struct *napi, int budget)
 {
-	struct myri10ge_priv *mgp = netdev_priv(netdev);
+	struct myri10ge_priv *mgp = container_of(napi, struct myri10ge_priv, napi);
+	struct net_device *netdev = mgp->dev;
 	struct myri10ge_rx_done *rx_done = &mgp->rx_done;
-	int limit, orig_limit, work_done;
+	int work_done;
 
 	/* process as many rx events as NAPI will allow */
-	limit = min(*budget, netdev->quota);
-	orig_limit = limit;
-	myri10ge_clean_rx_done(mgp, &limit);
-	work_done = orig_limit - limit;
-	*budget -= work_done;
-	netdev->quota -= work_done;
+	work_done = myri10ge_clean_rx_done(mgp, budget);
 
 	if (rx_done->entry[rx_done->idx].length == 0 || !netif_running(netdev)) {
-		netif_rx_complete(netdev);
+		netif_rx_complete(netdev, napi);
 		put_be32(htonl(3), mgp->irq_claim);
-		return 0;
 	}
-	return 1;
+	return work_done;
 }
 
 static irqreturn_t myri10ge_intr(int irq, void *arg)
@@ -1225,7 +1219,7 @@ static irqreturn_t myri10ge_intr(int irq, void *arg)
 	/* low bit indicates receives are present, so schedule
 	 * napi poll handler */
 	if (stats->valid & 1)
-		netif_rx_schedule(mgp->dev);
+		netif_rx_schedule(mgp->dev, &mgp->napi);
 
 	if (!mgp->msi_enabled) {
 		put_be32(0, mgp->irq_deassert);
@@ -1852,7 +1846,7 @@ static int myri10ge_open(struct net_device *dev)
 	mgp->link_state = htonl(~0U);
 	mgp->rdma_tags_available = 15;
 
-	netif_poll_enable(mgp->dev);	/* must happen prior to any irq */
+	napi_enable(&mgp->napi);	/* must happen prior to any irq */
 
 	status = myri10ge_send_cmd(mgp, MXGEFW_CMD_ETHERNET_UP, &cmd, 0);
 	if (status) {
@@ -1896,7 +1890,7 @@ static int myri10ge_close(struct net_device *dev)
 
 	del_timer_sync(&mgp->watchdog_timer);
 	mgp->running = MYRI10GE_ETH_STOPPING;
-	netif_poll_disable(mgp->dev);
+	napi_disable(&mgp->napi);
 	netif_carrier_off(dev);
 	netif_stop_queue(dev);
 	old_down_cnt = mgp->down_cnt;
@@ -2854,6 +2848,8 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	mgp = netdev_priv(netdev);
 	memset(mgp, 0, sizeof(*mgp));
 	mgp->dev = netdev;
+	netif_napi_add(netdev, &mgp->napi,
+		       myri10ge_poll, myri10ge_napi_weight);
 	mgp->pdev = pdev;
 	mgp->csum_flag = MXGEFW_FLAGS_CKSUM;
 	mgp->pause = myri10ge_flow_control;
@@ -2990,8 +2986,6 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	netdev->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO;
 	if (dac_enabled)
 		netdev->features |= NETIF_F_HIGHDMA;
-	netdev->poll = myri10ge_poll;
-	netdev->weight = myri10ge_napi_weight;
 
 	/* make sure we can get an irq, and that MSI can be
 	 * setup (if available).  Also ensure netdev->irq
diff --git a/drivers/net/natsemi.c b/drivers/net/natsemi.c
index 6bb48ba..df978f5 100644
--- a/drivers/net/natsemi.c
+++ b/drivers/net/natsemi.c
@@ -560,6 +560,8 @@ struct netdev_private {
 	/* address of a sent-in-place packet/buffer, for later free() */
 	struct sk_buff *tx_skbuff[TX_RING_SIZE];
 	dma_addr_t tx_dma[TX_RING_SIZE];
+	struct net_device *dev;
+	struct napi_struct napi;
 	struct net_device_stats stats;
 	/* Media monitoring timer */
 	struct timer_list timer;
@@ -636,7 +638,7 @@ static void init_registers(struct net_device *dev);
 static int start_tx(struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t intr_handler(int irq, void *dev_instance);
 static void netdev_error(struct net_device *dev, int intr_status);
-static int natsemi_poll(struct net_device *dev, int *budget);
+static int natsemi_poll(struct napi_struct *napi, int budget);
 static void netdev_rx(struct net_device *dev, int *work_done, int work_to_do);
 static void netdev_tx_done(struct net_device *dev);
 static int natsemi_change_mtu(struct net_device *dev, int new_mtu);
@@ -861,6 +863,7 @@ static int __devinit natsemi_probe1 (struct pci_dev *pdev,
 	dev->irq = irq;
 
 	np = netdev_priv(dev);
+	netif_napi_add(dev, &np->napi, natsemi_poll, 64);
 
 	np->pci_dev = pdev;
 	pci_set_drvdata(pdev, dev);
@@ -931,8 +934,6 @@ static int __devinit natsemi_probe1 (struct pci_dev *pdev,
 	dev->do_ioctl = &netdev_ioctl;
 	dev->tx_timeout = &tx_timeout;
 	dev->watchdog_timeo = TX_TIMEOUT;
-	dev->poll = natsemi_poll;
-	dev->weight = 64;
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	dev->poll_controller = &natsemi_poll_controller;
@@ -2200,10 +2201,10 @@ static irqreturn_t intr_handler(int irq, void *dev_instance)
 
 	prefetch(&np->rx_skbuff[np->cur_rx % RX_RING_SIZE]);
 
-	if (netif_rx_schedule_prep(dev)) {
+	if (netif_rx_schedule_prep(dev, &np->napi)) {
 		/* Disable interrupts and register for poll */
 		natsemi_irq_disable(dev);
-		__netif_rx_schedule(dev);
+		__netif_rx_schedule(dev, &np->napi);
 	} else
 		printk(KERN_WARNING
 	       	       "%s: Ignoring interrupt, status %#08x, mask %#08x.\n",
@@ -2216,12 +2217,11 @@ static irqreturn_t intr_handler(int irq, void *dev_instance)
 /* This is the NAPI poll routine.  As well as the standard RX handling
  * it also handles all other interrupts that the chip might raise.
  */
-static int natsemi_poll(struct net_device *dev, int *budget)
+static int natsemi_poll(struct napi_struct *napi, int budget)
 {
-	struct netdev_private *np = netdev_priv(dev);
+	struct netdev_private *np = container_of(napi, struct netdev_private, napi);
+	struct net_device *dev = np->dev;
 	void __iomem * ioaddr = ns_ioaddr(dev);
-
-	int work_to_do = min(*budget, dev->quota);
 	int work_done = 0;
 
 	do {
@@ -2236,7 +2236,7 @@ static int natsemi_poll(struct net_device *dev, int *budget)
 		if (np->intr_status &
 		    (IntrRxDone | IntrRxIntr | RxStatusFIFOOver |
 		     IntrRxErr | IntrRxOverrun)) {
-			netdev_rx(dev, &work_done, work_to_do);
+			netdev_rx(dev, &work_done, budget);
 		}
 
 		if (np->intr_status &
@@ -2250,16 +2250,13 @@ static int natsemi_poll(struct net_device *dev, int *budget)
 		if (np->intr_status & IntrAbnormalSummary)
 			netdev_error(dev, np->intr_status);
 
-		*budget -= work_done;
-		dev->quota -= work_done;
-
-		if (work_done >= work_to_do)
-			return 1;
+		if (work_done >= budget)
+			return work_done;
 
 		np->intr_status = readl(ioaddr + IntrStatus);
 	} while (np->intr_status);
 
-	netif_rx_complete(dev);
+	netif_rx_complete(dev, napi);
 
 	/* Reenable interrupts providing nothing is trying to shut
 	 * the chip down. */
@@ -2268,7 +2265,7 @@ static int natsemi_poll(struct net_device *dev, int *budget)
 		natsemi_irq_enable(dev);
 	spin_unlock(&np->lock);
 
-	return 0;
+	return work_done;
 }
 
 /* This routine is logically part of the interrupt handler, but separated
@@ -3155,6 +3152,8 @@ static int netdev_close(struct net_device *dev)
 			dev->name, np->cur_tx, np->dirty_tx,
 			np->cur_rx, np->dirty_rx);
 
+	napi_disable(&np->napi);
+
 	/*
 	 * FIXME: what if someone tries to close a device
 	 * that is suspended?
diff --git a/drivers/net/netxen/netxen_nic.h b/drivers/net/netxen/netxen_nic.h
index d4c92cc..aaa3493 100644
--- a/drivers/net/netxen/netxen_nic.h
+++ b/drivers/net/netxen/netxen_nic.h
@@ -880,6 +880,7 @@ struct netxen_adapter {
 	struct netxen_adapter *master;
 	struct net_device *netdev;
 	struct pci_dev *pdev;
+	struct napi_struct napi;
 	struct net_device_stats net_stats;
 	unsigned char mac_addr[ETH_ALEN];
 	int mtu;
diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c
index 08a62ac..61e3d32 100644
--- a/drivers/net/netxen/netxen_nic_main.c
+++ b/drivers/net/netxen/netxen_nic_main.c
@@ -68,7 +68,7 @@ static void netxen_tx_timeout(struct net_device *netdev);
 static void netxen_tx_timeout_task(struct work_struct *work);
 static void netxen_watchdog(unsigned long);
 static int netxen_handle_int(struct netxen_adapter *, struct net_device *);
-static int netxen_nic_poll(struct net_device *dev, int *budget);
+static int netxen_nic_poll(struct napi_struct *napi, int budget);
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void netxen_nic_poll_controller(struct net_device *netdev);
 #endif
@@ -402,6 +402,9 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	adapter->netdev  = netdev;
 	adapter->pdev    = pdev;
 
+	netif_napi_add(netdev, &adapter->napi,
+		       netxen_nic_poll, NETXEN_NETDEV_WEIGHT);
+
 	/* this will be read from FW later */
 	adapter->intr_scheme = -1;
 
@@ -422,8 +425,6 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	netxen_nic_change_mtu(netdev, netdev->mtu);
 
 	SET_ETHTOOL_OPS(netdev, &netxen_nic_ethtool_ops);
-	netdev->poll = netxen_nic_poll;
-	netdev->weight = NETXEN_NETDEV_WEIGHT;
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	netdev->poll_controller = netxen_nic_poll_controller;
 #endif
@@ -928,6 +929,7 @@ static int netxen_nic_close(struct net_device *netdev)
 
 	netif_carrier_off(netdev);
 	netif_stop_queue(netdev);
+	napi_disable(&adapter->napi);
 
 	netxen_nic_disable_int(adapter);
 
@@ -1248,11 +1250,11 @@ netxen_handle_int(struct netxen_adapter *adapter, struct net_device *netdev)
 	netxen_nic_disable_int(adapter);
 
 	if (netxen_nic_rx_has_work(adapter) || netxen_nic_tx_has_work(adapter)) {
-		if (netif_rx_schedule_prep(netdev)) {
+		if (netif_rx_schedule_prep(netdev, &adapter->napi)) {
 			/*
 			 * Interrupts are already disabled.
 			 */
-			__netif_rx_schedule(netdev);
+			__netif_rx_schedule(netdev, &adapter->napi);
 		} else {
 			static unsigned int intcount = 0;
 			if ((++intcount & 0xfff) == 0xfff)
@@ -1310,14 +1312,13 @@ irqreturn_t netxen_intr(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static int netxen_nic_poll(struct net_device *netdev, int *budget)
+static int netxen_nic_poll(struct napi_struct *napi, int budget)
 {
-	struct netxen_adapter *adapter = netdev_priv(netdev);
-	int work_to_do = min(*budget, netdev->quota);
+	struct netxen_adapter *adapter = container_of(napi, struct netxen_adapter, napi);
+	struct net_device *netdev = adapter->netdev;
 	int done = 1;
 	int ctx;
-	int this_work_done;
-	int work_done = 0;
+	int work_done;
 
 	DPRINTK(INFO, "polling for %d descriptors\n", *budget);
 
@@ -1335,16 +1336,11 @@ static int netxen_nic_poll(struct net_device *netdev, int *budget)
 		 * packets are on one context, it gets only half of the quota,
 		 * and ends up not processing it.
 		 */
-		this_work_done = netxen_process_rcv_ring(adapter, ctx,
-							 work_to_do /
-							 MAX_RCV_CTX);
-		work_done += this_work_done;
+		work_done += netxen_process_rcv_ring(adapter, ctx,
+						     budget / MAX_RCV_CTX);
 	}
 
-	netdev->quota -= work_done;
-	*budget -= work_done;
-
-	if (work_done >= work_to_do && netxen_nic_rx_has_work(adapter) != 0)
+	if (work_done >= budget && netxen_nic_rx_has_work(adapter) != 0)
 		done = 0;
 
 	if (netxen_process_cmd_ring((unsigned long)adapter) == 0)
@@ -1353,11 +1349,11 @@ static int netxen_nic_poll(struct net_device *netdev, int *budget)
 	DPRINTK(INFO, "new work_done: %d work_to_do: %d\n",
 		work_done, work_to_do);
 	if (done) {
-		netif_rx_complete(netdev);
+		netif_rx_complete(netdev, napi);
 		netxen_nic_enable_int(adapter);
 	}
 
-	return !done;
+	return work_done;
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/pasemi_mac.c b/drivers/net/pasemi_mac.c
index 0b3066a..c9d148d 100644
--- a/drivers/net/pasemi_mac.c
+++ b/drivers/net/pasemi_mac.c
@@ -584,7 +584,7 @@ static irqreturn_t pasemi_mac_rx_intr(int irq, void *data)
 	if (*mac->rx_status & PAS_STATUS_TIMER)
 		reg |= PAS_IOB_DMA_RXCH_RESET_TINTC;
 
-	netif_rx_schedule(dev);
+	netif_rx_schedule(dev, &mac->napi);
 
 	pci_write_config_dword(mac->iob_pdev,
 			       PAS_IOB_DMA_RXCH_RESET(mac->dma_rxch), reg);
@@ -808,7 +808,7 @@ static int pasemi_mac_open(struct net_device *dev)
 		dev_warn(&mac->pdev->dev, "phy init failed: %d\n", ret);
 
 	netif_start_queue(dev);
-	netif_poll_enable(dev);
+	napi_enable(&mac->napi);
 
 	/* Interrupts are a bit different for our DMA controller: While
 	 * it's got one a regular PCI device header, the interrupt there
@@ -869,6 +869,7 @@ static int pasemi_mac_close(struct net_device *dev)
 	}
 
 	netif_stop_queue(dev);
+	napi_disable(&mac->napi);
 
 	/* Clean out any pending buffers */
 	pasemi_mac_clean_tx(mac);
@@ -1047,26 +1048,20 @@ static void pasemi_mac_set_rx_mode(struct net_device *dev)
 }
 
 
-static int pasemi_mac_poll(struct net_device *dev, int *budget)
+static int pasemi_mac_poll(struct napi_struct *napi, int budget)
 {
-	int pkts, limit = min(*budget, dev->quota);
-	struct pasemi_mac *mac = netdev_priv(dev);
-
-	pkts = pasemi_mac_clean_rx(mac, limit);
+	struct pasemi_mac *mac = container_of(napi, struct pasemi_mac, napi);
+	struct net_device *dev = mac->netdev;
+	int pkts;
 
-	dev->quota -= pkts;
-	*budget -= pkts;
-
-	if (pkts < limit) {
+	pkts = pasemi_mac_clean_rx(mac, budget);
+	if (pkts < budget) {
 		/* all done, no more packets present */
-		netif_rx_complete(dev);
+		netif_rx_complete(dev, napi);
 
 		pasemi_mac_restart_rx_intr(mac);
-		return 0;
-	} else {
-		/* used up our quantum, so reschedule */
-		return 1;
 	}
+	return pkts;
 }
 
 static int __devinit
@@ -1099,6 +1094,10 @@ pasemi_mac_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	mac->netdev = dev;
 	mac->dma_pdev = pci_get_device(PCI_VENDOR_ID_PASEMI, 0xa007, NULL);
 
+	netif_napi_add(dev, &mac->napi, pasemi_mac_poll, 64);
+
+	dev->features = NETIF_F_HW_CSUM;
+
 	if (!mac->dma_pdev) {
 		dev_err(&pdev->dev, "Can't find DMA Controller\n");
 		err = -ENODEV;
@@ -1150,9 +1149,6 @@ pasemi_mac_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->hard_start_xmit = pasemi_mac_start_tx;
 	dev->get_stats = pasemi_mac_get_stats;
 	dev->set_multicast_list = pasemi_mac_set_rx_mode;
-	dev->weight = 64;
-	dev->poll = pasemi_mac_poll;
-	dev->features = NETIF_F_HW_CSUM;
 
 	/* The dma status structure is located in the I/O bridge, and
 	 * is cache coherent.
diff --git a/drivers/net/pasemi_mac.h b/drivers/net/pasemi_mac.h
index c29ee15..85d3b78 100644
--- a/drivers/net/pasemi_mac.h
+++ b/drivers/net/pasemi_mac.h
@@ -56,6 +56,7 @@ struct pasemi_mac {
 	struct pci_dev *dma_pdev;
 	struct pci_dev *iob_pdev;
 	struct phy_device *phydev;
+	struct napi_struct napi;
 	struct net_device_stats stats;
 
 	/* Pointer to the cacheable per-channel status registers */
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c
index e6a6753..b76053e 100644
--- a/drivers/net/pcnet32.c
+++ b/drivers/net/pcnet32.c
@@ -280,6 +280,8 @@ struct pcnet32_private {
 	unsigned int		dirty_rx,	/* ring entries to be freed. */
 				dirty_tx;
 
+	struct net_device	*dev;
+	struct napi_struct	napi;
 	struct net_device_stats	stats;
 	char			tx_full;
 	char			phycount;	/* number of phys found */
@@ -440,15 +442,17 @@ static struct pcnet32_access pcnet32_dwio = {
 
 static void pcnet32_netif_stop(struct net_device *dev)
 {
+	struct pcnet32_private *lp = netdev_priv(dev);
 	dev->trans_start = jiffies;
-	netif_poll_disable(dev);
+	napi_disable(&lp->napi);
 	netif_tx_disable(dev);
 }
 
 static void pcnet32_netif_start(struct net_device *dev)
 {
+	struct pcnet32_private *lp = netdev_priv(dev);
 	netif_wake_queue(dev);
-	netif_poll_enable(dev);
+	napi_enable(&lp->napi);
 }
 
 /*
@@ -816,7 +820,7 @@ static int pcnet32_set_ringparam(struct net_device *dev,
 	if ((1 << i) != lp->rx_ring_size)
 		pcnet32_realloc_rx_ring(dev, lp, i);
 
-	dev->weight = lp->rx_ring_size / 2;
+	lp->napi.weight = lp->rx_ring_size / 2;
 
 	if (netif_running(dev)) {
 		pcnet32_netif_start(dev);
@@ -1255,7 +1259,7 @@ static void pcnet32_rx_entry(struct net_device *dev,
 	return;
 }
 
-static int pcnet32_rx(struct net_device *dev, int quota)
+static int pcnet32_rx(struct net_device *dev, int budget)
 {
 	struct pcnet32_private *lp = netdev_priv(dev);
 	int entry = lp->cur_rx & lp->rx_mod_mask;
@@ -1263,7 +1267,7 @@ static int pcnet32_rx(struct net_device *dev, int quota)
 	int npackets = 0;
 
 	/* If we own the next entry, it's a new packet. Send it up. */
-	while (quota > npackets && (short)le16_to_cpu(rxp->status) >= 0) {
+	while (npackets < budget && (short)le16_to_cpu(rxp->status) >= 0) {
 		pcnet32_rx_entry(dev, lp, rxp, entry);
 		npackets += 1;
 		/*
@@ -1379,15 +1383,16 @@ static int pcnet32_tx(struct net_device *dev)
 }
 
 #ifdef CONFIG_PCNET32_NAPI
-static int pcnet32_poll(struct net_device *dev, int *budget)
+static int pcnet32_poll(struct napi_struct *napi, int budget)
 {
-	struct pcnet32_private *lp = netdev_priv(dev);
-	int quota = min(dev->quota, *budget);
+	struct pcnet32_private *lp = container_of(napi, struct pcnet32_private, napi);
+	struct net_device *dev = lp->dev;
 	unsigned long ioaddr = dev->base_addr;
 	unsigned long flags;
+	int work_done;
 	u16 val;
 
-	quota = pcnet32_rx(dev, quota);
+	work_done = pcnet32_rx(dev, budget);
 
 	spin_lock_irqsave(&lp->lock, flags);
 	if (pcnet32_tx(dev)) {
@@ -1399,28 +1404,22 @@ static int pcnet32_poll(struct net_device *dev, int *budget)
 	}
 	spin_unlock_irqrestore(&lp->lock, flags);
 
-	*budget -= quota;
-	dev->quota -= quota;
-
-	if (dev->quota == 0) {
-		return 1;
-	}
-
-	netif_rx_complete(dev);
-
-	spin_lock_irqsave(&lp->lock, flags);
+	if (work_done < budget) {
+		spin_lock_irqsave(&lp->lock, flags);
 
-	/* clear interrupt masks */
-	val = lp->a.read_csr(ioaddr, CSR3);
-	val &= 0x00ff;
-	lp->a.write_csr(ioaddr, CSR3, val);
+		__netif_rx_complete(dev, napi);
 
-	/* Set interrupt enable. */
-	lp->a.write_csr(ioaddr, CSR0, CSR0_INTEN);
-	mmiowb();
-	spin_unlock_irqrestore(&lp->lock, flags);
+		/* clear interrupt masks */
+		val = lp->a.read_csr(ioaddr, CSR3);
+		val &= 0x00ff;
+		lp->a.write_csr(ioaddr, CSR3, val);
 
-	return 0;
+		/* Set interrupt enable. */
+		lp->a.write_csr(ioaddr, CSR0, CSR0_INTEN);
+		mmiowb();
+		spin_unlock_irqrestore(&lp->lock, flags);
+	}
+	return work_done;
 }
 #endif
 
@@ -1815,6 +1814,8 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev)
 	}
 	lp->pci_dev = pdev;
 
+	lp->dev = dev;
+
 	spin_lock_init(&lp->lock);
 
 	SET_MODULE_OWNER(dev);
@@ -1843,6 +1844,10 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev)
 	lp->mii_if.mdio_read = mdio_read;
 	lp->mii_if.mdio_write = mdio_write;
 
+#ifdef CONFIG_PCNET32_NAPI
+	netif_napi_add(dev, &lp->napi, pcnet32_poll, lp->rx_ring_size / 2);
+#endif
+
 	if (fdx && !(lp->options & PCNET32_PORT_ASEL) &&
 	    ((cards_found >= MAX_UNITS) || full_duplex[cards_found]))
 		lp->options |= PCNET32_PORT_FD;
@@ -1953,10 +1958,6 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev)
 	dev->ethtool_ops = &pcnet32_ethtool_ops;
 	dev->tx_timeout = pcnet32_tx_timeout;
 	dev->watchdog_timeo = (5 * HZ);
-	dev->weight = lp->rx_ring_size / 2;
-#ifdef CONFIG_PCNET32_NAPI
-	dev->poll = pcnet32_poll;
-#endif
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	dev->poll_controller = pcnet32_poll_controller;
@@ -2599,18 +2600,18 @@ pcnet32_interrupt(int irq, void *dev_id)
 			/* unlike for the lance, there is no restart needed */
 		}
 #ifdef CONFIG_PCNET32_NAPI
-		if (netif_rx_schedule_prep(dev)) {
+		if (netif_rx_schedule_prep(dev, &lp->napi)) {
 			u16 val;
 			/* set interrupt masks */
 			val = lp->a.read_csr(ioaddr, CSR3);
 			val |= 0x5f00;
 			lp->a.write_csr(ioaddr, CSR3, val);
 			mmiowb();
-			__netif_rx_schedule(dev);
+			__netif_rx_schedule(dev, &lp->napi);
 			break;
 		}
 #else
-		pcnet32_rx(dev, dev->weight);
+		pcnet32_rx(dev, dev->napi.weight);
 		if (pcnet32_tx(dev)) {
 			/* reset the chip to clear the error condition, then restart */
 			lp->a.reset(ioaddr);
@@ -2645,6 +2646,7 @@ static int pcnet32_close(struct net_device *dev)
 	del_timer_sync(&lp->watchdog_timer);
 
 	netif_stop_queue(dev);
+	napi_disable(&lp->napi);
 
 	spin_lock_irqsave(&lp->lock, flags);
 
diff --git a/drivers/net/ps3_gelic_net.c b/drivers/net/ps3_gelic_net.c
index 13d1c0a..568c62b 100644
--- a/drivers/net/ps3_gelic_net.c
+++ b/drivers/net/ps3_gelic_net.c
@@ -556,7 +556,7 @@ static int gelic_net_stop(struct net_device *netdev)
 {
 	struct gelic_net_card *card = netdev_priv(netdev);
 
-	netif_poll_disable(netdev);
+	napi_disable(&card->napi);
 	netif_stop_queue(netdev);
 
 	/* turn off DMA, force end */
@@ -988,32 +988,24 @@ refill:
  * if the quota is exceeded, but the driver has still packets.
  *
  */
-static int gelic_net_poll(struct net_device *netdev, int *budget)
+static int gelic_net_poll(struct napi_struct *napi, int budget)
 {
-	struct gelic_net_card *card = netdev_priv(netdev);
-	int packets_to_do, packets_done = 0;
-	int no_more_packets = 0;
-
-	packets_to_do = min(*budget, netdev->quota);
+	struct gelic_net_card *card = container_of(napi, struct gelic_net_card, napi);
+	struct net_device *netdev = card->netdev;
+	int packets_done = 0;
 
-	while (packets_to_do) {
-		if (gelic_net_decode_one_descr(card)) {
-			packets_done++;
-			packets_to_do--;
-		} else {
-			/* no more packets for the stack */
-			no_more_packets = 1;
+	while (packets_done < budget) {
+		if (!gelic_net_decode_one_descr(card))
 			break;
-		}
+
+		packets_done++;
 	}
-	netdev->quota -= packets_done;
-	*budget -= packets_done;
-	if (no_more_packets) {
-		netif_rx_complete(netdev);
+
+	if (packets_done < budget) {
+		netif_rx_complete(netdev, napi);
 		gelic_net_rx_irq_on(card);
-		return 0;
-	} else
-		return 1;
+	}
+	return packets_done;
 }
 /**
  * gelic_net_change_mtu - changes the MTU of an interface
@@ -1056,7 +1048,7 @@ static irqreturn_t gelic_net_interrupt(int irq, void *ptr)
 
 	if (status & GELIC_NET_RXINT) {
 		gelic_net_rx_irq_off(card);
-		netif_rx_schedule(netdev);
+		netif_rx_schedule(netdev, &card->napi);
 	}
 
 	if (status & GELIC_NET_TXINT) {
@@ -1361,9 +1353,6 @@ static void gelic_net_setup_netdev_ops(struct net_device *netdev)
 	/* tx watchdog */
 	netdev->tx_timeout = &gelic_net_tx_timeout;
 	netdev->watchdog_timeo = GELIC_NET_WATCHDOG_TIMEOUT;
-	/* NAPI */
-	netdev->poll = &gelic_net_poll;
-	netdev->weight = GELIC_NET_NAPI_WEIGHT;
 	netdev->ethtool_ops = &gelic_net_ethtool_ops;
 }
 
@@ -1391,6 +1380,9 @@ static int gelic_net_setup_netdev(struct gelic_net_card *card)
 
 	gelic_net_setup_netdev_ops(netdev);
 
+	netif_napi_add(netdev, &card->napi,
+		       gelic_net_poll, GELIC_NET_NAPI_WEIGHT);
+
 	netdev->features = NETIF_F_IP_CSUM;
 
 	status = lv1_net_control(bus_id(card), dev_id(card),
diff --git a/drivers/net/ps3_gelic_net.h b/drivers/net/ps3_gelic_net.h
index a9c4c4f..9685602 100644
--- a/drivers/net/ps3_gelic_net.h
+++ b/drivers/net/ps3_gelic_net.h
@@ -194,6 +194,7 @@ struct gelic_net_descr_chain {
 
 struct gelic_net_card {
 	struct net_device *netdev;
+	struct napi_struct napi;
 	/*
 	 * hypervisor requires irq_status should be
 	 * 8 bytes aligned, but u64 member is
diff --git a/drivers/net/qla3xxx.c b/drivers/net/qla3xxx.c
index 69da95b..3287b69 100755
--- a/drivers/net/qla3xxx.c
+++ b/drivers/net/qla3xxx.c
@@ -2303,10 +2303,10 @@ static int ql_tx_rx_clean(struct ql3_adapter *qdev,
 	return work_done;
 }
 
-static int ql_poll(struct net_device *ndev, int *budget)
+static int ql_poll(struct napi_struct *napi, int budget)
 {
-	struct ql3_adapter *qdev = netdev_priv(ndev);
-	int work_to_do = min(*budget, ndev->quota);
+	struct ql3_adapter *qdev = container_of(napi, struct ql3_adapter, napi);
+	struct net_device *ndev = qdev->ndev;
 	int rx_cleaned = 0, tx_cleaned = 0;
 	unsigned long hw_flags;
 	struct ql3xxx_port_registers __iomem *port_regs = qdev->mem_map_registers;
@@ -2314,16 +2314,13 @@ static int ql_poll(struct net_device *ndev, int *budget)
 	if (!netif_carrier_ok(ndev))
 		goto quit_polling;
 
-	ql_tx_rx_clean(qdev, &tx_cleaned, &rx_cleaned, work_to_do);
-	*budget -= rx_cleaned;
-	ndev->quota -= rx_cleaned;
+	ql_tx_rx_clean(qdev, &tx_cleaned, &rx_cleaned, budget);
 
-	if( tx_cleaned + rx_cleaned != work_to_do ||
+	if (tx_cleaned + rx_cleaned != budget ||
 	    !netif_running(ndev)) {
 quit_polling:
-		netif_rx_complete(ndev);
-
 		spin_lock_irqsave(&qdev->hw_lock, hw_flags);
+		__netif_rx_complete(ndev, napi);
 		ql_update_small_bufq_prod_index(qdev);
 		ql_update_lrg_bufq_prod_index(qdev);
 		writel(qdev->rsp_consumer_index,
@@ -2331,9 +2328,8 @@ quit_polling:
 		spin_unlock_irqrestore(&qdev->hw_lock, hw_flags);
 
 		ql_enable_interrupts(qdev);
-		return 0;
 	}
-	return 1;
+	return tx_cleaned + rx_cleaned;
 }
 
 static irqreturn_t ql3xxx_isr(int irq, void *dev_id)
@@ -2383,8 +2379,8 @@ static irqreturn_t ql3xxx_isr(int irq, void *dev_id)
 		spin_unlock(&qdev->adapter_lock);
 	} else if (value & ISP_IMR_DISABLE_CMPL_INT) {
 		ql_disable_interrupts(qdev);
-		if (likely(netif_rx_schedule_prep(ndev))) {
-			__netif_rx_schedule(ndev);
+		if (likely(netif_rx_schedule_prep(ndev, &qdev->napi))) {
+			__netif_rx_schedule(ndev, &qdev->napi);
 		}
 	} else {
 		return IRQ_NONE;
@@ -3610,7 +3606,7 @@ static int ql_adapter_down(struct ql3_adapter *qdev, int do_reset)
 
 	del_timer_sync(&qdev->adapter_timer);
 
-	netif_poll_disable(ndev);
+	napi_disable(&qdev->napi);
 
 	if (do_reset) {
 		int soft_reset;
@@ -3698,7 +3694,7 @@ static int ql_adapter_up(struct ql3_adapter *qdev)
 
 	mod_timer(&qdev->adapter_timer, jiffies + HZ * 1);
 
-	netif_poll_enable(ndev);
+	napi_enable(&qdev->napi);
 	ql_enable_interrupts(qdev);
 	return 0;
 
@@ -4054,8 +4050,7 @@ static int __devinit ql3xxx_probe(struct pci_dev *pdev,
 	ndev->tx_timeout = ql3xxx_tx_timeout;
 	ndev->watchdog_timeo = 5 * HZ;
 
-	ndev->poll = &ql_poll;
-	ndev->weight = 64;
+	netif_napi_add(ndev, &qdev->napi, ql_poll, 64);
 
 	ndev->irq = pdev->irq;
 
diff --git a/drivers/net/qla3xxx.h b/drivers/net/qla3xxx.h
index 4a832c4..aa2216f 100755
--- a/drivers/net/qla3xxx.h
+++ b/drivers/net/qla3xxx.h
@@ -1175,6 +1175,8 @@ struct ql3_adapter {
 	struct pci_dev *pdev;
 	struct net_device *ndev;	/* Parent NET device */
 
+	struct napi_struct napi;
+
 	/* Hardware information */
 	u8 chip_rev_id;
 	u8 pci_slot;
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index c9333b9..406de0d 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -384,6 +384,7 @@ struct rtl8169_private {
 	void __iomem *mmio_addr;	/* memory map physical address */
 	struct pci_dev *pci_dev;	/* Index of PCI device */
 	struct net_device *dev;
+	struct napi_struct napi;
 	struct net_device_stats stats;	/* statistics of net device */
 	spinlock_t lock;		/* spin lock flag */
 	u32 msg_enable;
@@ -443,13 +444,13 @@ static void rtl_set_rx_mode(struct net_device *dev);
 static void rtl8169_tx_timeout(struct net_device *dev);
 static struct net_device_stats *rtl8169_get_stats(struct net_device *dev);
 static int rtl8169_rx_interrupt(struct net_device *, struct rtl8169_private *,
-				void __iomem *);
+				void __iomem *, u32 budget);
 static int rtl8169_change_mtu(struct net_device *dev, int new_mtu);
 static void rtl8169_down(struct net_device *dev);
 static void rtl8169_rx_clear(struct rtl8169_private *tp);
 
 #ifdef CONFIG_R8169_NAPI
-static int rtl8169_poll(struct net_device *dev, int *budget);
+static int rtl8169_poll(struct napi_struct *napi, int budget);
 #endif
 
 static const unsigned int rtl8169_rx_config =
@@ -1647,8 +1648,7 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->set_mac_address = rtl_set_mac_address;
 
 #ifdef CONFIG_R8169_NAPI
-	dev->poll = rtl8169_poll;
-	dev->weight = R8169_NAPI_WEIGHT;
+	netif_napi_add(dev, &tp->napi, rtl8169_poll, R8169_NAPI_WEIGHT);
 #endif
 
 #ifdef CONFIG_R8169_VLAN
@@ -2063,7 +2063,7 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
 	if (ret < 0)
 		goto out;
 
-	netif_poll_enable(dev);
+	napi_enable(&tp->napi);
 
 	rtl_hw_start(dev);
 
@@ -2255,11 +2255,11 @@ static void rtl8169_wait_for_quiescence(struct net_device *dev)
 	synchronize_irq(dev->irq);
 
 	/* Wait for any pending NAPI task to complete */
-	netif_poll_disable(dev);
+	napi_disable(&tp->napi);
 
 	rtl8169_irq_mask_and_ack(ioaddr);
 
-	netif_poll_enable(dev);
+	napi_enable(&tp->napi);
 }
 
 static void rtl8169_reinit_task(struct work_struct *work)
@@ -2303,7 +2303,7 @@ static void rtl8169_reset_task(struct work_struct *work)
 
 	rtl8169_wait_for_quiescence(dev);
 
-	rtl8169_rx_interrupt(dev, tp, tp->mmio_addr);
+	rtl8169_rx_interrupt(dev, tp, tp->mmio_addr, ~(u32)0);
 	rtl8169_tx_clear(tp);
 
 	if (tp->dirty_rx == tp->cur_rx) {
@@ -2608,14 +2608,14 @@ out:
 
 static int rtl8169_rx_interrupt(struct net_device *dev,
 				struct rtl8169_private *tp,
-				void __iomem *ioaddr)
+				void __iomem *ioaddr, u32 budget)
 {
 	unsigned int cur_rx, rx_left;
 	unsigned int delta, count;
 
 	cur_rx = tp->cur_rx;
 	rx_left = NUM_RX_DESC + tp->dirty_rx - cur_rx;
-	rx_left = rtl8169_rx_quota(rx_left, (u32) dev->quota);
+	rx_left = rtl8169_rx_quota(rx_left, budget);
 
 	for (; rx_left > 0; rx_left--, cur_rx++) {
 		unsigned int entry = cur_rx % NUM_RX_DESC;
@@ -2763,8 +2763,8 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
 		RTL_W16(IntrMask, tp->intr_event & ~tp->napi_event);
 		tp->intr_mask = ~tp->napi_event;
 
-		if (likely(netif_rx_schedule_prep(dev)))
-			__netif_rx_schedule(dev);
+		if (likely(netif_rx_schedule_prep(dev, &tp->napi)))
+			__netif_rx_schedule(dev, &tp->napi);
 		else if (netif_msg_intr(tp)) {
 			printk(KERN_INFO "%s: interrupt %04x taken in poll\n",
 			       dev->name, status);
@@ -2773,7 +2773,7 @@ static irqreturn_t rtl8169_interrupt(int irq, void *dev_instance)
 #else
 		/* Rx interrupt */
 		if (status & (RxOK | RxOverflow | RxFIFOOver))
-			rtl8169_rx_interrupt(dev, tp, ioaddr);
+			rtl8169_rx_interrupt(dev, tp, ioaddr, ~(u32)0);
 
 		/* Tx interrupt */
 		if (status & (TxOK | TxErr))
@@ -2796,20 +2796,18 @@ out:
 }
 
 #ifdef CONFIG_R8169_NAPI
-static int rtl8169_poll(struct net_device *dev, int *budget)
+static int rtl8169_poll(struct napi_struct *napi, int budget)
 {
-	unsigned int work_done, work_to_do = min(*budget, dev->quota);
-	struct rtl8169_private *tp = netdev_priv(dev);
+	struct rtl8169_private *tp = container_of(napi, struct rtl8169_private, napi);
+	struct net_device *dev = tp->dev;
 	void __iomem *ioaddr = tp->mmio_addr;
+	int work_done;
 
-	work_done = rtl8169_rx_interrupt(dev, tp, ioaddr);
+	work_done = rtl8169_rx_interrupt(dev, tp, ioaddr, (u32) budget);
 	rtl8169_tx_interrupt(dev, tp, ioaddr);
 
-	*budget -= work_done;
-	dev->quota -= work_done;
-
-	if (work_done < work_to_do) {
-		netif_rx_complete(dev);
+	if (work_done < budget) {
+		netif_rx_complete(dev, napi);
 		tp->intr_mask = 0xffff;
 		/*
 		 * 20040426: the barrier is not strictly required but the
@@ -2821,7 +2819,7 @@ static int rtl8169_poll(struct net_device *dev, int *budget)
 		RTL_W16(IntrMask, tp->intr_event);
 	}
 
-	return (work_done >= work_to_do);
+	return work_done;
 }
 #endif
 
@@ -2850,7 +2848,7 @@ core_down:
 	synchronize_irq(dev->irq);
 
 	if (!poll_locked) {
-		netif_poll_disable(dev);
+		napi_disable(&tp->napi);
 		poll_locked++;
 	}
 
@@ -2888,7 +2886,7 @@ static int rtl8169_close(struct net_device *dev)
 
 	free_irq(dev->irq, dev);
 
-	netif_poll_enable(dev);
+	napi_disable(&tp->napi);
 
 	pci_free_consistent(pdev, R8169_RX_RING_BYTES, tp->RxDescArray,
 			    tp->RxPhyAddr);
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 24feb00..6686e88 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -2568,7 +2568,7 @@ static void free_rx_buffers(struct s2io_nic *sp)
 
 /**
  * s2io_poll - Rx interrupt handler for NAPI support
- * @dev : pointer to the device structure.
+ * @napi : pointer to the napi structure.
  * @budget : The number of packets that were budgeted to be processed
  * during  one pass through the 'Poll" function.
  * Description:
@@ -2579,9 +2579,10 @@ static void free_rx_buffers(struct s2io_nic *sp)
  * 0 on success and 1 if there are No Rx packets to be processed.
  */
 
-static int s2io_poll(struct net_device *dev, int *budget)
+static int s2io_poll(struct napi_struct *napi, int budget)
 {
-	struct s2io_nic *nic = dev->priv;
+	struct s2io_nic *nic = container_of(napi, struct s2io_nic, napi);
+	struct net_device *dev = nic->dev;
 	int pkt_cnt = 0, org_pkts_to_process;
 	struct mac_info *mac_control;
 	struct config_param *config;
@@ -2592,9 +2593,7 @@ static int s2io_poll(struct net_device *dev, int *budget)
 	mac_control = &nic->mac_control;
 	config = &nic->config;
 
-	nic->pkts_to_process = *budget;
-	if (nic->pkts_to_process > dev->quota)
-		nic->pkts_to_process = dev->quota;
+	nic->pkts_to_process = budget;
 	org_pkts_to_process = nic->pkts_to_process;
 
 	writeq(S2IO_MINUS_ONE, &bar0->rx_traffic_int);
@@ -2608,12 +2607,8 @@ static int s2io_poll(struct net_device *dev, int *budget)
 			goto no_rx;
 		}
 	}
-	if (!pkt_cnt)
-		pkt_cnt = 1;
 
-	dev->quota -= pkt_cnt;
-	*budget -= pkt_cnt;
-	netif_rx_complete(dev);
+	netif_rx_complete(dev, napi);
 
 	for (i = 0; i < config->rx_ring_num; i++) {
 		if (fill_rx_buffers(nic, i) == -ENOMEM) {
@@ -2626,12 +2621,9 @@ static int s2io_poll(struct net_device *dev, int *budget)
 	writeq(0x0, &bar0->rx_traffic_mask);
 	readl(&bar0->rx_traffic_mask);
 	atomic_dec(&nic->isr_cnt);
-	return 0;
+	return pkt_cnt;
 
 no_rx:
-	dev->quota -= pkt_cnt;
-	*budget -= pkt_cnt;
-
 	for (i = 0; i < config->rx_ring_num; i++) {
 		if (fill_rx_buffers(nic, i) == -ENOMEM) {
 			DBG_PRINT(INFO_DBG, "%s:Out of memory", dev->name);
@@ -2640,7 +2632,7 @@ no_rx:
 		}
 	}
 	atomic_dec(&nic->isr_cnt);
-	return 1;
+	return pkt_cnt;
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -3861,6 +3853,7 @@ static int s2io_close(struct net_device *dev)
 	struct s2io_nic *sp = dev->priv;
 
 	netif_stop_queue(dev);
+	napi_disable(&sp->napi);
 	/* Reset card, kill tasklet and free Tx and Rx buffers. */
 	s2io_card_down(sp);
 
@@ -4232,8 +4225,8 @@ static irqreturn_t s2io_isr(int irq, void *dev_id)
 
 	if (napi) {
 		if (reason & GEN_INTR_RXTRAFFIC) {
-			if ( likely ( netif_rx_schedule_prep(dev)) ) {
-				__netif_rx_schedule(dev);
+			if (likely (netif_rx_schedule_prep(dev, &sp->napi))) {
+				__netif_rx_schedule(dev, &sp->napi);
 				writeq(S2IO_MINUS_ONE, &bar0->rx_traffic_mask);
 			}
 			else
@@ -7215,8 +7208,7 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
 	 * will use eth_mac_addr() for  dev->set_mac_address
 	 * mac address will be set every time dev->open() is called
 	 */
-	dev->poll = s2io_poll;
-	dev->weight = 32;
+	netif_napi_add(dev, &sp->napi, s2io_poll, 32);
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	dev->poll_controller = s2io_netpoll;
diff --git a/drivers/net/s2io.h b/drivers/net/s2io.h
index 92983ee..420fefb 100644
--- a/drivers/net/s2io.h
+++ b/drivers/net/s2io.h
@@ -786,6 +786,7 @@ struct s2io_nic {
 	 */
 	int pkts_to_process;
 	struct net_device *dev;
+	struct napi_struct napi;
 	struct mac_info mac_control;
 	struct config_param config;
 	struct pci_dev *pdev;
@@ -1019,7 +1020,7 @@ static void s2io_set_multicast(struct net_device *dev);
 static int rx_osm_handler(struct ring_info *ring_data, struct RxD_t * rxdp);
 static void s2io_link(struct s2io_nic * sp, int link);
 static void s2io_reset(struct s2io_nic * sp);
-static int s2io_poll(struct net_device *dev, int *budget);
+static int s2io_poll(struct napi_struct *napi, int budget);
 static void s2io_init_pci(struct s2io_nic * sp);
 static int s2io_set_mac_addr(struct net_device *dev, u8 * addr);
 static void s2io_alarm_handle(unsigned long data);
diff --git a/drivers/net/sb1250-mac.c b/drivers/net/sb1250-mac.c
index e7fdcf1..8386ba8 100644
--- a/drivers/net/sb1250-mac.c
+++ b/drivers/net/sb1250-mac.c
@@ -238,6 +238,7 @@ struct sbmac_softc {
 	 */
 
 	struct net_device *sbm_dev;		/* pointer to linux device */
+	struct napi_struct napi;
 	spinlock_t sbm_lock;		/* spin lock */
 	struct timer_list sbm_timer;     	/* for monitoring MII */
 	struct net_device_stats sbm_stats;
@@ -320,7 +321,7 @@ static struct net_device_stats *sbmac_get_stats(struct net_device *dev);
 static void sbmac_set_rx_mode(struct net_device *dev);
 static int sbmac_mii_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static int sbmac_close(struct net_device *dev);
-static int sbmac_poll(struct net_device *poll_dev, int *budget);
+static int sbmac_poll(struct napi_struct *napi, int budget);
 
 static int sbmac_mii_poll(struct sbmac_softc *s,int noisy);
 static int sbmac_mii_probe(struct net_device *dev);
@@ -2154,20 +2155,13 @@ static irqreturn_t sbmac_intr(int irq,void *dev_instance)
 	 * Transmits on channel 0
 	 */
 
-	if (isr & (M_MAC_INT_CHANNEL << S_MAC_TX_CH0)) {
+	if (isr & (M_MAC_INT_CHANNEL << S_MAC_TX_CH0))
 		sbdma_tx_process(sc,&(sc->sbm_txdma), 0);
-#ifdef CONFIG_NETPOLL_TRAP
-		if (netpoll_trap()) {
-			if (test_and_clear_bit(__LINK_STATE_XOFF, &dev->state))
-				__netif_schedule(dev);
-		}
-#endif
-	}
 
 	if (isr & (M_MAC_INT_CHANNEL << S_MAC_RX_CH0)) {
-		if (netif_rx_schedule_prep(dev)) {
+		if (netif_rx_schedule_prep(dev, &sc->napi)) {
 			__raw_writeq(0, sc->sbm_imr);
-			__netif_rx_schedule(dev);
+			__netif_rx_schedule(dev, &sc->napi);
 			/* Depend on the exit from poll to reenable intr */
 		}
 		else {
@@ -2470,8 +2464,8 @@ static int sbmac_init(struct net_device *dev, int idx)
 	dev->do_ioctl           = sbmac_mii_ioctl;
 	dev->tx_timeout         = sbmac_tx_timeout;
 	dev->watchdog_timeo     = TX_TIMEOUT;
-	dev->poll               = sbmac_poll;
-	dev->weight             = 16;
+
+	netif_napi_add(dev, &sc->napi, sbmac_poll, 16);
 
 	dev->change_mtu         = sb1250_change_mtu;
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -2850,6 +2844,8 @@ static int sbmac_close(struct net_device *dev)
 	unsigned long flags;
 	int irq;
 
+	napi_disable(&sc->napi);
+
 	sbmac_set_channel_state(sc,sbmac_state_off);
 
 	del_timer_sync(&sc->sbm_timer);
@@ -2874,26 +2870,17 @@ static int sbmac_close(struct net_device *dev)
 	return 0;
 }
 
-static int sbmac_poll(struct net_device *dev, int *budget)
+static int sbmac_poll(struct napi_struct *napi, int budget)
 {
-	int work_to_do;
+	struct sbmac_softc *sc = container_of(napi, struct sbmac_softc, napi);
+	struct net_device *dev = sc->sbm_dev;
 	int work_done;
-	struct sbmac_softc *sc = netdev_priv(dev);
-
-	work_to_do = min(*budget, dev->quota);
-	work_done = sbdma_rx_process(sc, &(sc->sbm_rxdma), work_to_do, 1);
-
-	if (work_done > work_to_do)
-		printk(KERN_ERR "%s exceeded work_to_do budget=%d quota=%d work-done=%d\n",
-		       sc->sbm_dev->name, *budget, dev->quota, work_done);
 
+	work_done = sbdma_rx_process(sc, &(sc->sbm_rxdma), budget, 1);
 	sbdma_tx_process(sc, &(sc->sbm_txdma), 1);
 
-	*budget -= work_done;
-	dev->quota -= work_done;
-
-	if (work_done < work_to_do) {
-		netif_rx_complete(dev);
+	if (work_done < budget) {
+		netif_rx_complete(dev, napi);
 
 #ifdef CONFIG_SBMAC_COALESCE
 		__raw_writeq(((M_MAC_INT_EOP_COUNT | M_MAC_INT_EOP_TIMER) << S_MAC_TX_CH0) |
@@ -2905,7 +2892,7 @@ static int sbmac_poll(struct net_device *dev, int *budget)
 #endif
 	}
 
-	return (work_done >= work_to_do);
+	return work_done;
 }
 
 #if defined(SBMAC_ETH0_HWADDR) || defined(SBMAC_ETH1_HWADDR) || defined(SBMAC_ETH2_HWADDR) || defined(SBMAC_ETH3_HWADDR)
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index e3d8520..0bf46ed 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -2528,7 +2528,7 @@ static int skge_up(struct net_device *dev)
 	skge_write32(hw, B0_IMSK, hw->intr_mask);
 	spin_unlock_irq(&hw->hw_lock);
 
-	netif_poll_enable(dev);
+	napi_enable(&skge->napi);
 	return 0;
 
  free_rx_ring:
@@ -2558,7 +2558,7 @@ static int skge_down(struct net_device *dev)
 	if (hw->chip_id == CHIP_ID_GENESIS && hw->phy_type == SK_PHY_XMAC)
 		del_timer_sync(&skge->link_timer);
 
-	netif_poll_disable(dev);
+	napi_disable(&skge->napi);
 	netif_carrier_off(dev);
 
 	spin_lock_irq(&hw->hw_lock);
@@ -3044,14 +3044,13 @@ static void skge_tx_done(struct net_device *dev)
 	}
 }
 
-static int skge_poll(struct net_device *dev, int *budget)
+static int skge_poll(struct napi_struct *napi, int to_do)
 {
-	struct skge_port *skge = netdev_priv(dev);
+	struct skge_port *skge = container_of(napi, struct skge_port, napi);
+	struct net_device *dev = skge->netdev;
 	struct skge_hw *hw = skge->hw;
 	struct skge_ring *ring = &skge->rx_ring;
 	struct skge_element *e;
-	unsigned long flags;
-	int to_do = min(dev->quota, *budget);
 	int work_done = 0;
 
 	skge_tx_done(dev);
@@ -3082,20 +3081,16 @@ static int skge_poll(struct net_device *dev, int *budget)
 	wmb();
 	skge_write8(hw, Q_ADDR(rxqaddr[skge->port], Q_CSR), CSR_START);
 
-	*budget -= work_done;
-	dev->quota -= work_done;
-
-	if (work_done >=  to_do)
-		return 1; /* not done */
-
-	spin_lock_irqsave(&hw->hw_lock, flags);
-	__netif_rx_complete(dev);
-	hw->intr_mask |= napimask[skge->port];
-  	skge_write32(hw, B0_IMSK, hw->intr_mask);
-	skge_read32(hw, B0_IMSK);
-	spin_unlock_irqrestore(&hw->hw_lock, flags);
+	if (work_done < to_do) {
+		spin_lock_irq(&hw->hw_lock);
+		__netif_rx_complete(dev, napi);
+		hw->intr_mask |= napimask[skge->port];
+		skge_write32(hw, B0_IMSK, hw->intr_mask);
+		skge_read32(hw, B0_IMSK);
+		spin_unlock_irq(&hw->hw_lock);
+	}
 
-	return 0;
+	return work_done;
 }
 
 /* Parity errors seem to happen when Genesis is connected to a switch
@@ -3252,8 +3247,9 @@ static irqreturn_t skge_intr(int irq, void *dev_id)
 	}
 
 	if (status & (IS_XA1_F|IS_R1_F)) {
+		struct skge_port *skge = netdev_priv(hw->dev[0]);
 		hw->intr_mask &= ~(IS_XA1_F|IS_R1_F);
-		netif_rx_schedule(hw->dev[0]);
+		netif_rx_schedule(hw->dev[0], &skge->napi);
 	}
 
 	if (status & IS_PA_TO_TX1)
@@ -3271,13 +3267,14 @@ static irqreturn_t skge_intr(int irq, void *dev_id)
 		skge_mac_intr(hw, 0);
 
 	if (hw->dev[1]) {
+		struct skge_port *skge = netdev_priv(hw->dev[1]);
+
 		if (status & (IS_XA2_F|IS_R2_F)) {
 			hw->intr_mask &= ~(IS_XA2_F|IS_R2_F);
-			netif_rx_schedule(hw->dev[1]);
+			netif_rx_schedule(hw->dev[1], &skge->napi);
 		}
 
 		if (status & IS_PA_TO_RX2) {
-			struct skge_port *skge = netdev_priv(hw->dev[1]);
 			++skge->net_stats.rx_over_errors;
 			skge_write16(hw, B3_PA_CTRL, PA_CLR_TO_RX2);
 		}
@@ -3569,8 +3566,6 @@ static struct net_device *skge_devinit(struct skge_hw *hw, int port,
 	SET_ETHTOOL_OPS(dev, &skge_ethtool_ops);
 	dev->tx_timeout = skge_tx_timeout;
 	dev->watchdog_timeo = TX_WATCHDOG;
-	dev->poll = skge_poll;
-	dev->weight = NAPI_WEIGHT;
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	dev->poll_controller = skge_netpoll;
 #endif
@@ -3580,6 +3575,7 @@ static struct net_device *skge_devinit(struct skge_hw *hw, int port,
 		dev->features |= NETIF_F_HIGHDMA;
 
 	skge = netdev_priv(dev);
+	netif_napi_add(dev, &skge->napi, skge_poll, NAPI_WEIGHT);
 	skge->netdev = dev;
 	skge->hw = hw;
 	skge->msg_enable = netif_msg_init(debug, default_msg);
diff --git a/drivers/net/skge.h b/drivers/net/skge.h
index edd7146..dd0fd45 100644
--- a/drivers/net/skge.h
+++ b/drivers/net/skge.h
@@ -2448,6 +2448,7 @@ enum pause_status {
 struct skge_port {
 	struct skge_hw	     *hw;
 	struct net_device    *netdev;
+	struct napi_struct   napi;
 	int		     port;
 	u32		     msg_enable;
 
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index e7a2ead..fb1bb63 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -1078,7 +1078,7 @@ static void sky2_vlan_rx_register(struct net_device *dev, struct vlan_group *grp
 	u16 port = sky2->port;
 
 	netif_tx_lock_bh(dev);
-	netif_poll_disable(sky2->hw->dev[0]);
+	napi_disable(&hw->napi);
 
 	sky2->vlgrp = grp;
 	if (grp) {
@@ -1093,7 +1093,7 @@ static void sky2_vlan_rx_register(struct net_device *dev, struct vlan_group *grp
 			     TX_VLAN_TAG_OFF);
 	}
 
-	netif_poll_enable(sky2->hw->dev[0]);
+	napi_enable(&hw->napi);
 	netif_tx_unlock_bh(dev);
 }
 #endif
@@ -1622,6 +1622,8 @@ static int sky2_down(struct net_device *dev)
 	/* Stop more packets from being queued */
 	netif_stop_queue(dev);
 
+	napi_disable(&hw->napi);
+
 	/* Disable port IRQ */
 	imask = sky2_read32(hw, B0_IMSK);
 	imask &= ~portirq_msk[port];
@@ -1956,7 +1958,7 @@ static int sky2_change_mtu(struct net_device *dev, int new_mtu)
 
 	dev->trans_start = jiffies;	/* prevent tx timeout */
 	netif_stop_queue(dev);
-	netif_poll_disable(hw->dev[0]);
+	napi_disable(&hw->napi);
 
 	synchronize_irq(hw->pdev->irq);
 
@@ -1988,7 +1990,7 @@ static int sky2_change_mtu(struct net_device *dev, int new_mtu)
 	else {
 		gma_write16(hw, port, GM_GP_CTRL, ctl);
 
-		netif_poll_enable(hw->dev[0]);
+		napi_enable(&hw->napi);
 		netif_wake_queue(dev);
 	}
 
@@ -2434,10 +2436,8 @@ static inline void sky2_idle_start(struct sky2_hw *hw)
 static void sky2_idle(unsigned long arg)
 {
 	struct sky2_hw *hw = (struct sky2_hw *) arg;
-	struct net_device *dev = hw->dev[0];
 
-	if (__netif_rx_schedule_prep(dev))
-		__netif_rx_schedule(dev);
+	napi_schedule(&hw->napi);
 
 	mod_timer(&hw->idle_timer, jiffies + msecs_to_jiffies(idle_timeout));
 }
@@ -2470,11 +2470,11 @@ static void sky2_err_intr(struct sky2_hw *hw, u32 status)
 		sky2_le_error(hw, 1, Q_XA2, TX_RING_SIZE);
 }
 
-static int sky2_poll(struct net_device *dev0, int *budget)
+static int sky2_poll(struct napi_struct *napi, int work_limit)
 {
-	struct sky2_hw *hw = ((struct sky2_port *) netdev_priv(dev0))->hw;
-	int work_done;
+	struct sky2_hw *hw = container_of(napi, struct sky2_hw, napi);
 	u32 status = sky2_read32(hw, B0_Y2_SP_EISR);
+	int work_done;
 
 	if (unlikely(status & Y2_IS_ERROR))
 		sky2_err_intr(hw, status);
@@ -2485,31 +2485,27 @@ static int sky2_poll(struct net_device *dev0, int *budget)
 	if (status & Y2_IS_IRQ_PHY2)
 		sky2_phy_intr(hw, 1);
 
-	work_done = sky2_status_intr(hw, min(dev0->quota, *budget));
-	*budget -= work_done;
-	dev0->quota -= work_done;
+	work_done = sky2_status_intr(hw, work_limit);
 
 	/* More work? */
- 	if (hw->st_idx != sky2_read16(hw, STAT_PUT_IDX))
-		return 1;
+ 	if (hw->st_idx == sky2_read16(hw, STAT_PUT_IDX)) {
+		/* Bug/Errata workaround?
+		 * Need to kick the TX irq moderation timer.
+		 */
+		if (sky2_read8(hw, STAT_TX_TIMER_CTRL) == TIM_START) {
+			sky2_write8(hw, STAT_TX_TIMER_CTRL, TIM_STOP);
+			sky2_write8(hw, STAT_TX_TIMER_CTRL, TIM_START);
+		}
 
-	/* Bug/Errata workaround?
-	 * Need to kick the TX irq moderation timer.
-	 */
-	if (sky2_read8(hw, STAT_TX_TIMER_CTRL) == TIM_START) {
-		sky2_write8(hw, STAT_TX_TIMER_CTRL, TIM_STOP);
-		sky2_write8(hw, STAT_TX_TIMER_CTRL, TIM_START);
+		napi_complete(napi);
+		sky2_read32(hw, B0_Y2_SP_LISR);
 	}
-	netif_rx_complete(dev0);
-
-	sky2_read32(hw, B0_Y2_SP_LISR);
-	return 0;
+	return work_done;
 }
 
 static irqreturn_t sky2_intr(int irq, void *dev_id)
 {
 	struct sky2_hw *hw = dev_id;
-	struct net_device *dev0 = hw->dev[0];
 	u32 status;
 
 	/* Reading this mask interrupts as side effect */
@@ -2518,8 +2514,8 @@ static irqreturn_t sky2_intr(int irq, void *dev_id)
 		return IRQ_NONE;
 
 	prefetch(&hw->st_le[hw->st_idx]);
-	if (likely(__netif_rx_schedule_prep(dev0)))
-		__netif_rx_schedule(dev0);
+	
+	napi_schedule(&hw->napi);
 
 	return IRQ_HANDLED;
 }
@@ -2528,10 +2524,8 @@ static irqreturn_t sky2_intr(int irq, void *dev_id)
 static void sky2_netpoll(struct net_device *dev)
 {
 	struct sky2_port *sky2 = netdev_priv(dev);
-	struct net_device *dev0 = sky2->hw->dev[0];
 
-	if (netif_running(dev) && __netif_rx_schedule_prep(dev0))
-		__netif_rx_schedule(dev0);
+	napi_schedule(&sky2->hw->napi);
 }
 #endif
 
@@ -2733,7 +2727,7 @@ static void sky2_restart(struct work_struct *work)
 	sky2_write32(hw, B0_IMSK, 0);
 	sky2_read32(hw, B0_IMSK);
 
-	netif_poll_disable(hw->dev[0]);
+	napi_disable(&hw->napi);
 
 	for (i = 0; i < hw->ports; i++) {
 		dev = hw->dev[i];
@@ -2743,7 +2737,7 @@ static void sky2_restart(struct work_struct *work)
 
 	sky2_reset(hw);
 	sky2_write32(hw, B0_IMSK, Y2_IS_BASE);
-	netif_poll_enable(hw->dev[0]);
+	napi_enable(&hw->napi);
 
 	for (i = 0; i < hw->ports; i++) {
 		dev = hw->dev[i];
@@ -3571,7 +3565,7 @@ static int sky2_debug_show(struct seq_file *seq, void *v)
 		   sky2_read32(hw, B0_IMSK),
 		   sky2_read32(hw, B0_Y2_SP_ICR));
 
-	netif_poll_disable(hw->dev[0]);
+	napi_disable(&hw->napi);
 	last = sky2_read16(hw, STAT_PUT_IDX);
 
 	if (hw->st_idx == last)
@@ -3641,7 +3635,7 @@ static int sky2_debug_show(struct seq_file *seq, void *v)
 		   last = sky2_read16(hw, Y2_QADDR(rxqaddr[port], PREF_UNIT_PUT_IDX)),
 		   sky2_read16(hw, Y2_QADDR(rxqaddr[port], PREF_UNIT_LAST_IDX)));
 
-	netif_poll_enable(hw->dev[0]);
+	napi_enable(&hw->napi);
 	return 0;
 }
 
@@ -3766,15 +3760,8 @@ static __devinit struct net_device *sky2_init_netdev(struct sky2_hw *hw,
 	SET_ETHTOOL_OPS(dev, &sky2_ethtool_ops);
 	dev->tx_timeout = sky2_tx_timeout;
 	dev->watchdog_timeo = TX_WATCHDOG;
-	if (port == 0)
-		dev->poll = sky2_poll;
-	dev->weight = NAPI_WEIGHT;
 #ifdef CONFIG_NET_POLL_CONTROLLER
-	/* Network console (only works on port 0)
-	 * because netpoll makes assumptions about NAPI
-	 */
-	if (port == 0)
-		dev->poll_controller = sky2_netpoll;
+	dev->poll_controller = sky2_netpoll;
 #endif
 
 	sky2 = netdev_priv(dev);
@@ -3985,6 +3972,7 @@ static int __devinit sky2_probe(struct pci_dev *pdev,
 		err = -ENOMEM;
 		goto err_out_free_pci;
 	}
+	netif_napi_add(dev, &hw->napi, sky2_poll, NAPI_WEIGHT);
 
 	if (!disable_msi && pci_enable_msi(pdev) == 0) {
 		err = sky2_test_msi(hw);
@@ -4109,7 +4097,7 @@ static int sky2_suspend(struct pci_dev *pdev, pm_message_t state)
 		return 0;
 
 	del_timer_sync(&hw->idle_timer);
-	netif_poll_disable(hw->dev[0]);
+	napi_disable(&hw->napi);
 
 	for (i = 0; i < hw->ports; i++) {
 		struct net_device *dev = hw->dev[i];
@@ -4173,7 +4161,7 @@ static int sky2_resume(struct pci_dev *pdev)
 		}
 	}
 
-	netif_poll_enable(hw->dev[0]);
+	napi_enable(&hw->napi);
 	sky2_idle_start(hw);
 	return 0;
 out:
@@ -4192,7 +4180,7 @@ static void sky2_shutdown(struct pci_dev *pdev)
 		return;
 
 	del_timer_sync(&hw->idle_timer);
-	netif_poll_disable(hw->dev[0]);
+	napi_disable(&hw->napi);
 
 	for (i = 0; i < hw->ports; i++) {
 		struct net_device *dev = hw->dev[i];
diff --git a/drivers/net/sky2.h b/drivers/net/sky2.h
index dce4d27..8d6a260 100644
--- a/drivers/net/sky2.h
+++ b/drivers/net/sky2.h
@@ -2039,6 +2039,7 @@ struct sky2_port {
 struct sky2_hw {
 	void __iomem  	     *regs;
 	struct pci_dev	     *pdev;
+	struct napi_struct   napi;
 	struct net_device    *dev[2];
 
 	u8	     	     chip_id;
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index 590b12c..a144327 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -1278,34 +1278,26 @@ bad_desc:
  * (using netif_receive_skb). If all/enough packets are up, the driver
  * reenables interrupts and returns 0. If not, 1 is returned.
  */
-static int
-spider_net_poll(struct net_device *netdev, int *budget)
+static int spider_net_poll(struct napi_struct *napi, int budget)
 {
-	struct spider_net_card *card = netdev_priv(netdev);
-	int packets_to_do, packets_done = 0;
-	int no_more_packets = 0;
-
-	packets_to_do = min(*budget, netdev->quota);
-
-	while (packets_to_do) {
-		if (spider_net_decode_one_descr(card)) {
-			packets_done++;
-			packets_to_do--;
-		} else {
-			/* no more packets for the stack */
-			no_more_packets = 1;
+	struct spider_net_card *card = container_of(napi, struct spider_net_card, napi);
+	struct net_device *netdev = card->netdev;
+	int packets_done = 0;
+
+	while (packets_done < budget) {
+		if (!spider_net_decode_one_descr(card))
 			break;
-		}
+
+		packets_done++;
 	}
 
 	if ((packets_done == 0) && (card->num_rx_ints != 0)) {
-		no_more_packets = spider_net_resync_tail_ptr(card);
+		if (!spider_net_resync_tail_ptr(card))
+			packets_done = budget;
 		spider_net_resync_head_ptr(card);
 	}
 	card->num_rx_ints = 0;
 
-	netdev->quota -= packets_done;
-	*budget -= packets_done;
 	spider_net_refill_rx_chain(card);
 	spider_net_enable_rxdmac(card);
 
@@ -1313,14 +1305,13 @@ spider_net_poll(struct net_device *netdev, int *budget)
 
 	/* if all packets are in the stack, enable interrupts and return 0 */
 	/* if not, return 1 */
-	if (no_more_packets) {
-		netif_rx_complete(netdev);
+	if (packets_done < budget) {
+		netif_rx_complete(netdev, napi);
 		spider_net_rx_irq_on(card);
 		card->ignore_rx_ramfull = 0;
-		return 0;
 	}
 
-	return 1;
+	return packets_done;
 }
 
 /**
@@ -1563,7 +1554,8 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg)
 			spider_net_refill_rx_chain(card);
 			spider_net_enable_rxdmac(card);
 			card->num_rx_ints ++;
-			netif_rx_schedule(card->netdev);
+			netif_rx_schedule(card->netdev,
+					  &card->napi);
 		}
 		show_error = 0;
 		break;
@@ -1583,7 +1575,8 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg)
 		spider_net_refill_rx_chain(card);
 		spider_net_enable_rxdmac(card);
 		card->num_rx_ints ++;
-		netif_rx_schedule(card->netdev);
+		netif_rx_schedule(card->netdev,
+				  &card->napi);
 		show_error = 0;
 		break;
 
@@ -1597,7 +1590,8 @@ spider_net_handle_error_irq(struct spider_net_card *card, u32 status_reg)
 		spider_net_refill_rx_chain(card);
 		spider_net_enable_rxdmac(card);
 		card->num_rx_ints ++;
-		netif_rx_schedule(card->netdev);
+		netif_rx_schedule(card->netdev,
+				  &card->napi);
 		show_error = 0;
 		break;
 
@@ -1690,11 +1684,11 @@ spider_net_interrupt(int irq, void *ptr)
 
 	if (status_reg & SPIDER_NET_RXINT ) {
 		spider_net_rx_irq_off(card);
-		netif_rx_schedule(netdev);
+		netif_rx_schedule(netdev, &card->napi);
 		card->num_rx_ints ++;
 	}
 	if (status_reg & SPIDER_NET_TXINT)
-		netif_rx_schedule(netdev);
+		netif_rx_schedule(netdev, &card->napi);
 
 	if (status_reg & SPIDER_NET_LINKINT)
 		spider_net_link_reset(netdev);
@@ -2038,7 +2032,7 @@ spider_net_open(struct net_device *netdev)
 
 	netif_start_queue(netdev);
 	netif_carrier_on(netdev);
-	netif_poll_enable(netdev);
+	napi_enable(&card->napi);
 
 	spider_net_enable_interrupts(card);
 
@@ -2208,7 +2202,7 @@ spider_net_stop(struct net_device *netdev)
 {
 	struct spider_net_card *card = netdev_priv(netdev);
 
-	netif_poll_disable(netdev);
+	napi_disable(&card->napi);
 	netif_carrier_off(netdev);
 	netif_stop_queue(netdev);
 	del_timer_sync(&card->tx_timer);
@@ -2308,9 +2302,6 @@ spider_net_setup_netdev_ops(struct net_device *netdev)
 	/* tx watchdog */
 	netdev->tx_timeout = &spider_net_tx_timeout;
 	netdev->watchdog_timeo = SPIDER_NET_WATCHDOG_TIMEOUT;
-	/* NAPI */
-	netdev->poll = &spider_net_poll;
-	netdev->weight = SPIDER_NET_NAPI_WEIGHT;
 	/* HW VLAN */
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	/* poll controller */
@@ -2355,6 +2346,9 @@ spider_net_setup_netdev(struct spider_net_card *card)
 
 	card->options.rx_csum = SPIDER_NET_RX_CSUM_DEFAULT;
 
+	netif_napi_add(netdev, &card->napi,
+		       spider_net_poll, SPIDER_NET_NAPI_WEIGHT);
+
 	spider_net_setup_netdev_ops(netdev);
 
 	netdev->features = NETIF_F_IP_CSUM | NETIF_F_LLTX;
diff --git a/drivers/net/spider_net.h b/drivers/net/spider_net.h
index dbbdb8c..a2fcdeb 100644
--- a/drivers/net/spider_net.h
+++ b/drivers/net/spider_net.h
@@ -466,6 +466,8 @@ struct spider_net_card {
 	struct pci_dev *pdev;
 	struct mii_phy phy;
 
+	struct napi_struct napi;
+
 	int medium;
 
 	void __iomem *regs;
diff --git a/drivers/net/starfire.c b/drivers/net/starfire.c
index 8b64786..2f172a2 100644
--- a/drivers/net/starfire.c
+++ b/drivers/net/starfire.c
@@ -178,16 +178,13 @@ static int full_duplex[MAX_UNITS] = {0, };
 #define skb_num_frags(skb) (skb_shinfo(skb)->nr_frags + 1)
 
 #ifdef HAVE_NETDEV_POLL
-#define init_poll(dev) \
-do { \
-	dev->poll = &netdev_poll; \
-	dev->weight = max_interrupt_work; \
-} while (0)
-#define netdev_rx(dev, ioaddr) \
+#define init_poll(dev, np) \
+	netif_napi_add(dev, &np->napi, netdev_poll, max_interrupt_work)
+#define netdev_rx(dev, np, ioaddr) \
 do { \
 	u32 intr_enable; \
-	if (netif_rx_schedule_prep(dev)) { \
-		__netif_rx_schedule(dev); \
+	if (netif_rx_schedule_prep(dev, &np->napi)) { \
+		__netif_rx_schedule(dev, &np->napi); \
 		intr_enable = readl(ioaddr + IntrEnable); \
 		intr_enable &= ~(IntrRxDone | IntrRxEmpty); \
 		writel(intr_enable, ioaddr + IntrEnable); \
@@ -204,12 +201,12 @@ do { \
 } while (0)
 #define netdev_receive_skb(skb) netif_receive_skb(skb)
 #define vlan_netdev_receive_skb(skb, vlgrp, vlid) vlan_hwaccel_receive_skb(skb, vlgrp, vlid)
-static int	netdev_poll(struct net_device *dev, int *budget);
+static int	netdev_poll(struct napi_struct *napi, int budget);
 #else  /* not HAVE_NETDEV_POLL */
-#define init_poll(dev)
+#define init_poll(dev, np)
 #define netdev_receive_skb(skb) netif_rx(skb)
 #define vlan_netdev_receive_skb(skb, vlgrp, vlid) vlan_hwaccel_rx(skb, vlgrp, vlid)
-#define netdev_rx(dev, ioaddr) \
+#define netdev_rx(dev, np, ioaddr) \
 do { \
 	int quota = np->dirty_rx + RX_RING_SIZE - np->cur_rx; \
 	__netdev_rx(dev, &quota);\
@@ -599,6 +596,8 @@ struct netdev_private {
 	struct tx_done_desc *tx_done_q;
 	dma_addr_t tx_done_q_dma;
 	unsigned int tx_done;
+	struct napi_struct napi;
+	struct net_device *dev;
 	struct net_device_stats stats;
 	struct pci_dev *pci_dev;
 #ifdef VLAN_SUPPORT
@@ -791,6 +790,7 @@ static int __devinit starfire_init_one(struct pci_dev *pdev,
 	dev->irq = irq;
 
 	np = netdev_priv(dev);
+	np->dev = dev;
 	np->base = base;
 	spin_lock_init(&np->lock);
 	pci_set_drvdata(pdev, dev);
@@ -851,7 +851,7 @@ static int __devinit starfire_init_one(struct pci_dev *pdev,
 	dev->hard_start_xmit = &start_tx;
 	dev->tx_timeout = tx_timeout;
 	dev->watchdog_timeo = TX_TIMEOUT;
-	init_poll(dev);
+	init_poll(dev, np);
 	dev->stop = &netdev_close;
 	dev->get_stats = &get_stats;
 	dev->set_multicast_list = &set_rx_mode;
@@ -1330,7 +1330,7 @@ static irqreturn_t intr_handler(int irq, void *dev_instance)
 		handled = 1;
 
 		if (intr_status & (IntrRxDone | IntrRxEmpty))
-			netdev_rx(dev, ioaddr);
+			netdev_rx(dev, np, ioaddr);
 
 		/* Scavenge the skbuff list based on the Tx-done queue.
 		   There are redundant checks here that may be cleaned up
@@ -1531,36 +1531,35 @@ static int __netdev_rx(struct net_device *dev, int *quota)
 
 
 #ifdef HAVE_NETDEV_POLL
-static int netdev_poll(struct net_device *dev, int *budget)
+static int netdev_poll(struct napi_struct *napi, int budget)
 {
+	struct netdev_private *np = container_of(napi, struct netdev_private, napi);
+	struct net_device *dev = np->dev;
 	u32 intr_status;
-	struct netdev_private *np = netdev_priv(dev);
 	void __iomem *ioaddr = np->base;
-	int retcode = 0, quota = dev->quota;
+	int quota = budget;
 
 	do {
 		writel(IntrRxDone | IntrRxEmpty, ioaddr + IntrClear);
 
-		retcode = __netdev_rx(dev, &quota);
-		*budget -= (dev->quota - quota);
-		dev->quota = quota;
-		if (retcode)
+		if (__netdev_rx(dev, &quota))
 			goto out;
 
 		intr_status = readl(ioaddr + IntrStatus);
 	} while (intr_status & (IntrRxDone | IntrRxEmpty));
 
-	netif_rx_complete(dev);
+	netif_rx_complete(dev, napi);
 	intr_status = readl(ioaddr + IntrEnable);
 	intr_status |= IntrRxDone | IntrRxEmpty;
 	writel(intr_status, ioaddr + IntrEnable);
 
  out:
 	if (debug > 5)
-		printk(KERN_DEBUG "  exiting netdev_poll(): %d.\n", retcode);
+		printk(KERN_DEBUG "  exiting netdev_poll(): %d.\n",
+		       budget - quota);
 
 	/* Restart Rx engine if stopped. */
-	return retcode;
+	return budget - quota;
 }
 #endif /* HAVE_NETDEV_POLL */
 
@@ -1904,6 +1903,7 @@ static int netdev_close(struct net_device *dev)
 	int i;
 
 	netif_stop_queue(dev);
+	napi_disable(&np->napi);
 
 	if (debug > 1) {
 		printk(KERN_DEBUG "%s: Shutting down ethercard, Intr status %#8.8x.\n",
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 4328038..ed6959e 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -19,7 +19,7 @@
  *
  *    gem_change_mtu() and gem_set_multicast() are called with a read_lock()
  *    help by net/core/dev.c, thus they can't schedule. That means they can't
- *    call netif_poll_disable() neither, thus force gem_poll() to keep a spinlock
+ *    call napi_disable() neither, thus force gem_poll() to keep a spinlock
  *    where it could have been dropped. change_mtu especially would love also to
  *    be able to msleep instead of horrid locked delays when resetting the HW,
  *    but that read_lock() makes it impossible, unless I defer it's action to
@@ -878,19 +878,20 @@ static int gem_rx(struct gem *gp, int work_to_do)
 	return work_done;
 }
 
-static int gem_poll(struct net_device *dev, int *budget)
+static int gem_poll(struct napi_struct *napi, int budget)
 {
-	struct gem *gp = dev->priv;
+	struct gem *gp = container_of(napi, struct gem, napi);
+	struct net_device *dev = gp->dev;
 	unsigned long flags;
+	int work_done;
 
 	/*
 	 * NAPI locking nightmare: See comment at head of driver
 	 */
 	spin_lock_irqsave(&gp->lock, flags);
 
+	work_done = 0;
 	do {
-		int work_to_do, work_done;
-
 		/* Handle anomalies */
 		if (gp->status & GREG_STAT_ABNORMAL) {
 			if (gem_abnormal_irq(dev, gp, gp->status))
@@ -906,29 +907,25 @@ static int gem_poll(struct net_device *dev, int *budget)
 
 		/* Run RX thread. We don't use any locking here,
 		 * code willing to do bad things - like cleaning the
-		 * rx ring - must call netif_poll_disable(), which
+		 * rx ring - must call napi_disable(), which
 		 * schedule_timeout()'s if polling is already disabled.
 		 */
-		work_to_do = min(*budget, dev->quota);
-
-		work_done = gem_rx(gp, work_to_do);
-
-		*budget -= work_done;
-		dev->quota -= work_done;
+		work_done += gem_rx(gp, budget);
 
-		if (work_done >= work_to_do)
-			return 1;
+		if (work_done >= budget)
+			return work_done;
 
 		spin_lock_irqsave(&gp->lock, flags);
 
 		gp->status = readl(gp->regs + GREG_STAT);
 	} while (gp->status & GREG_STAT_NAPI);
 
-	__netif_rx_complete(dev);
+	__netif_rx_complete(dev, napi);
 	gem_enable_ints(gp);
 
 	spin_unlock_irqrestore(&gp->lock, flags);
-	return 0;
+
+	return work_done;
 }
 
 static irqreturn_t gem_interrupt(int irq, void *dev_id)
@@ -946,17 +943,17 @@ static irqreturn_t gem_interrupt(int irq, void *dev_id)
 
 	spin_lock_irqsave(&gp->lock, flags);
 
-	if (netif_rx_schedule_prep(dev)) {
+	if (netif_rx_schedule_prep(dev, &gp->napi)) {
 		u32 gem_status = readl(gp->regs + GREG_STAT);
 
 		if (gem_status == 0) {
-			netif_poll_enable(dev);
+			napi_enable(&gp->napi);
 			spin_unlock_irqrestore(&gp->lock, flags);
 			return IRQ_NONE;
 		}
 		gp->status = gem_status;
 		gem_disable_ints(gp);
-		__netif_rx_schedule(dev);
+		__netif_rx_schedule(dev, &gp->napi);
 	}
 
 	spin_unlock_irqrestore(&gp->lock, flags);
@@ -2284,7 +2281,7 @@ static void gem_reset_task(struct work_struct *work)
 
 	mutex_lock(&gp->pm_mutex);
 
-	netif_poll_disable(gp->dev);
+	napi_disable(&gp->napi);
 
 	spin_lock_irq(&gp->lock);
 	spin_lock(&gp->tx_lock);
@@ -2307,7 +2304,7 @@ static void gem_reset_task(struct work_struct *work)
 	spin_unlock(&gp->tx_lock);
 	spin_unlock_irq(&gp->lock);
 
-	netif_poll_enable(gp->dev);
+	napi_enable(&gp->napi);
 
 	mutex_unlock(&gp->pm_mutex);
 }
@@ -2334,9 +2331,7 @@ static int gem_close(struct net_device *dev)
 {
 	struct gem *gp = dev->priv;
 
-	/* Note: we don't need to call netif_poll_disable() here because
-	 * our caller (dev_close) already did it for us
-	 */
+	napi_disable(&gp->napi);
 
 	mutex_lock(&gp->pm_mutex);
 
@@ -2358,7 +2353,7 @@ static int gem_suspend(struct pci_dev *pdev, pm_message_t state)
 
 	mutex_lock(&gp->pm_mutex);
 
-	netif_poll_disable(dev);
+	napi_disable(&gp->napi);
 
 	printk(KERN_INFO "%s: suspending, WakeOnLan %s\n",
 	       dev->name,
@@ -2482,7 +2477,7 @@ static int gem_resume(struct pci_dev *pdev)
 	spin_unlock(&gp->tx_lock);
 	spin_unlock_irqrestore(&gp->lock, flags);
 
-	netif_poll_enable(dev);
+	napi_enable(&gp->napi);
 
 	mutex_unlock(&gp->pm_mutex);
 
@@ -3121,8 +3116,7 @@ static int __devinit gem_init_one(struct pci_dev *pdev,
 	dev->get_stats = gem_get_stats;
 	dev->set_multicast_list = gem_set_multicast;
 	dev->do_ioctl = gem_ioctl;
-	dev->poll = gem_poll;
-	dev->weight = 64;
+	netif_napi_add(dev, &gp->napi, gem_poll, 64);
 	dev->ethtool_ops = &gem_ethtool_ops;
 	dev->tx_timeout = gem_tx_timeout;
 	dev->watchdog_timeo = 5 * HZ;
diff --git a/drivers/net/sungem.h b/drivers/net/sungem.h
index 58cf87c..76d760a 100644
--- a/drivers/net/sungem.h
+++ b/drivers/net/sungem.h
@@ -993,6 +993,7 @@ struct gem {
 	u32			msg_enable;
 	u32			status;
 
+	struct napi_struct	napi;
 	struct net_device_stats net_stats;
 
 	int			tx_fifo_sz;
diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c
index ec41469..f8d5bca 100644
--- a/drivers/net/tc35815.c
+++ b/drivers/net/tc35815.c
@@ -414,6 +414,9 @@ enum tc35815_timer_state {
 struct tc35815_local {
 	struct pci_dev *pci_dev;
 
+	struct net_device *dev;
+	struct napi_struct napi;
+
 	/* statistics */
 	struct net_device_stats stats;
 	struct {
@@ -566,7 +569,7 @@ static int	tc35815_send_packet(struct sk_buff *skb, struct net_device *dev);
 static irqreturn_t	tc35815_interrupt(int irq, void *dev_id);
 #ifdef TC35815_NAPI
 static int	tc35815_rx(struct net_device *dev, int limit);
-static int	tc35815_poll(struct net_device *dev, int *budget);
+static int	tc35815_poll(struct napi_struct *napi, int budget);
 #else
 static void	tc35815_rx(struct net_device *dev);
 #endif
@@ -685,6 +688,7 @@ static int __devinit tc35815_init_one (struct pci_dev *pdev,
 	SET_MODULE_OWNER(dev);
 	SET_NETDEV_DEV(dev, &pdev->dev);
 	lp = dev->priv;
+	lp->dev = dev;
 
 	/* enable device (incl. PCI PM wakeup), and bus-mastering */
 	rc = pci_enable_device (pdev);
@@ -738,8 +742,7 @@ static int __devinit tc35815_init_one (struct pci_dev *pdev,
 	dev->tx_timeout = tc35815_tx_timeout;
 	dev->watchdog_timeo = TC35815_TX_TIMEOUT;
 #ifdef TC35815_NAPI
-	dev->poll = tc35815_poll;
-	dev->weight = NAPI_WEIGHT;
+	netif_napi_add(dev, &lp->napi, tc35815_poll, NAPI_WEIGHT);
 #endif
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	dev->poll_controller = tc35815_poll_controller;
@@ -748,8 +751,6 @@ static int __devinit tc35815_init_one (struct pci_dev *pdev,
 	dev->irq = pdev->irq;
 	dev->base_addr = (unsigned long) ioaddr;
 
-	/* dev->priv/lp zeroed and aligned in alloc_etherdev */
-	lp = dev->priv;
 	spin_lock_init(&lp->lock);
 	lp->pci_dev = pdev;
 	lp->boardtype = ent->driver_data;
@@ -1436,6 +1437,7 @@ static int tc35815_do_interrupt(struct net_device *dev, u32 status)
 static irqreturn_t tc35815_interrupt(int irq, void *dev_id)
 {
 	struct net_device *dev = dev_id;
+	struct tc35815_local *lp = netdev_priv(dev);
 	struct tc35815_regs __iomem *tr =
 		(struct tc35815_regs __iomem *)dev->base_addr;
 #ifdef TC35815_NAPI
@@ -1444,8 +1446,8 @@ static irqreturn_t tc35815_interrupt(int irq, void *dev_id)
 	if (!(dmactl & DMA_IntMask)) {
 		/* disable interrupts */
 		tc_writel(dmactl | DMA_IntMask, &tr->DMA_Ctl);
-		if (netif_rx_schedule_prep(dev))
-			__netif_rx_schedule(dev);
+		if (netif_rx_schedule_prep(dev, &lp->napi))
+			__netif_rx_schedule(dev, &lp->napi);
 		else {
 			printk(KERN_ERR "%s: interrupt taken in poll\n",
 			       dev->name);
@@ -1726,13 +1728,12 @@ tc35815_rx(struct net_device *dev)
 }
 
 #ifdef TC35815_NAPI
-static int
-tc35815_poll(struct net_device *dev, int *budget)
+static int tc35815_poll(struct napi_struct *napi, int budget)
 {
-	struct tc35815_local *lp = dev->priv;
+	struct tc35815_local *lp = container_of(napi, struct tc35815_local, napi);
+	struct net_device *dev = lp->dev;
 	struct tc35815_regs __iomem *tr =
 		(struct tc35815_regs __iomem *)dev->base_addr;
-	int limit = min(*budget, dev->quota);
 	int received = 0, handled;
 	u32 status;
 
@@ -1744,23 +1745,19 @@ tc35815_poll(struct net_device *dev, int *budget)
 		handled = tc35815_do_interrupt(dev, status, limit);
 		if (handled >= 0) {
 			received += handled;
-			limit -= handled;
-			if (limit <= 0)
+			if (received >= budget)
 				break;
 		}
 		status = tc_readl(&tr->Int_Src);
 	} while (status);
 	spin_unlock(&lp->lock);
 
-	dev->quota -= received;
-	*budget -= received;
-	if (limit <= 0)
-		return 1;
-
-	netif_rx_complete(dev);
-	/* enable interrupts */
-	tc_writel(tc_readl(&tr->DMA_Ctl) & ~DMA_IntMask, &tr->DMA_Ctl);
-	return 0;
+	if (received < budget) {
+		netif_rx_complete(dev, napi);
+		/* enable interrupts */
+		tc_writel(tc_readl(&tr->DMA_Ctl) & ~DMA_IntMask, &tr->DMA_Ctl);
+	}
+	return received;
 }
 #endif
 
@@ -1949,7 +1946,9 @@ static int
 tc35815_close(struct net_device *dev)
 {
 	struct tc35815_local *lp = dev->priv;
+
 	netif_stop_queue(dev);
+	napi_disable(&lp->napi);
 
 	/* Flush the Tx and disable Rx here. */
 
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 5874042..05c5077 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -574,7 +574,7 @@ static void tg3_restart_ints(struct tg3 *tp)
 static inline void tg3_netif_stop(struct tg3 *tp)
 {
 	tp->dev->trans_start = jiffies;	/* prevent tx timeout */
-	netif_poll_disable(tp->dev);
+	napi_disable(&tp->napi);
 	netif_tx_disable(tp->dev);
 }
 
@@ -585,7 +585,7 @@ static inline void tg3_netif_start(struct tg3 *tp)
 	 * so long as all callers are assured to have free tx slots
 	 * (such as after tg3_init_hw)
 	 */
-	netif_poll_enable(tp->dev);
+	napi_enable(&tp->napi);
 	tp->hw_status->status |= SD_STATUS_UPDATED;
 	tg3_enable_ints(tp);
 }
@@ -3471,11 +3471,12 @@ next_pkt_nopost:
 	return received;
 }
 
-static int tg3_poll(struct net_device *netdev, int *budget)
+static int tg3_poll(struct napi_struct *napi, int budget)
 {
-	struct tg3 *tp = netdev_priv(netdev);
+	struct tg3 *tp = container_of(napi, struct tg3, napi);
+	struct net_device *netdev = tp->dev;
 	struct tg3_hw_status *sblk = tp->hw_status;
-	int done;
+	int work_done = 0;
 
 	/* handle link change and other phy events */
 	if (!(tp->tg3_flags &
@@ -3494,7 +3495,7 @@ static int tg3_poll(struct net_device *netdev, int *budget)
 	if (sblk->idx[0].tx_consumer != tp->tx_cons) {
 		tg3_tx(tp);
 		if (unlikely(tp->tg3_flags & TG3_FLAG_TX_RECOVERY_PENDING)) {
-			netif_rx_complete(netdev);
+			netif_rx_complete(netdev, napi);
 			schedule_work(&tp->reset_task);
 			return 0;
 		}
@@ -3502,20 +3503,10 @@ static int tg3_poll(struct net_device *netdev, int *budget)
 
 	/* run RX thread, within the bounds set by NAPI.
 	 * All RX "locking" is done by ensuring outside
-	 * code synchronizes with dev->poll()
+	 * code synchronizes with tg3->napi.poll()
 	 */
-	if (sblk->idx[0].rx_producer != tp->rx_rcb_ptr) {
-		int orig_budget = *budget;
-		int work_done;
-
-		if (orig_budget > netdev->quota)
-			orig_budget = netdev->quota;
-
-		work_done = tg3_rx(tp, orig_budget);
-
-		*budget -= work_done;
-		netdev->quota -= work_done;
-	}
+	if (sblk->idx[0].rx_producer != tp->rx_rcb_ptr)
+		work_done = tg3_rx(tp, budget);
 
 	if (tp->tg3_flags & TG3_FLAG_TAGGED_STATUS) {
 		tp->last_tag = sblk->status_tag;
@@ -3524,13 +3515,12 @@ static int tg3_poll(struct net_device *netdev, int *budget)
 		sblk->status &= ~SD_STATUS_UPDATED;
 
 	/* if no more work, tell net stack and NIC we're done */
-	done = !tg3_has_work(tp);
-	if (done) {
-		netif_rx_complete(netdev);
+	if (!tg3_has_work(tp)) {
+		netif_rx_complete(netdev, napi);
 		tg3_restart_ints(tp);
 	}
 
-	return (done ? 0 : 1);
+	return work_done;
 }
 
 static void tg3_irq_quiesce(struct tg3 *tp)
@@ -3577,7 +3567,7 @@ static irqreturn_t tg3_msi_1shot(int irq, void *dev_id)
 	prefetch(&tp->rx_rcb[tp->rx_rcb_ptr]);
 
 	if (likely(!tg3_irq_sync(tp)))
-		netif_rx_schedule(dev);		/* schedule NAPI poll */
+		netif_rx_schedule(dev, &tp->napi);
 
 	return IRQ_HANDLED;
 }
@@ -3602,7 +3592,7 @@ static irqreturn_t tg3_msi(int irq, void *dev_id)
 	 */
 	tw32_mailbox(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001);
 	if (likely(!tg3_irq_sync(tp)))
-		netif_rx_schedule(dev);		/* schedule NAPI poll */
+		netif_rx_schedule(dev, &tp->napi);
 
 	return IRQ_RETVAL(1);
 }
@@ -3644,7 +3634,7 @@ static irqreturn_t tg3_interrupt(int irq, void *dev_id)
 	sblk->status &= ~SD_STATUS_UPDATED;
 	if (likely(tg3_has_work(tp))) {
 		prefetch(&tp->rx_rcb[tp->rx_rcb_ptr]);
-		netif_rx_schedule(dev);		/* schedule NAPI poll */
+		netif_rx_schedule(dev, &tp->napi);
 	} else {
 		/* No work, shared interrupt perhaps?  re-enable
 		 * interrupts, and flush that PCI write
@@ -3690,7 +3680,7 @@ static irqreturn_t tg3_interrupt_tagged(int irq, void *dev_id)
 	tw32_mailbox_f(MAILBOX_INTERRUPT_0 + TG3_64BIT_REG_LOW, 0x00000001);
 	if (tg3_irq_sync(tp))
 		goto out;
-	if (netif_rx_schedule_prep(dev)) {
+	if (netif_rx_schedule_prep(dev, &tp->napi)) {
 		prefetch(&tp->rx_rcb[tp->rx_rcb_ptr]);
 		/* Update last_tag to mark that this status has been
 		 * seen. Because interrupt may be shared, we may be
@@ -3698,7 +3688,7 @@ static irqreturn_t tg3_interrupt_tagged(int irq, void *dev_id)
 		 * if tg3_poll() is not scheduled.
 		 */
 		tp->last_tag = sblk->status_tag;
-		__netif_rx_schedule(dev);
+		__netif_rx_schedule(dev, &tp->napi);
 	}
 out:
 	return IRQ_RETVAL(handled);
@@ -3737,7 +3727,7 @@ static int tg3_restart_hw(struct tg3 *tp, int reset_phy)
 		tg3_full_unlock(tp);
 		del_timer_sync(&tp->timer);
 		tp->irq_sync = 0;
-		netif_poll_enable(tp->dev);
+		napi_enable(&tp->napi);
 		dev_close(tp->dev);
 		tg3_full_lock(tp, 0);
 	}
@@ -3932,7 +3922,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	len = skb_headlen(skb);
 
 	/* We are running in BH disabled context with netif_tx_lock
-	 * and TX reclaim runs via tp->poll inside of a software
+	 * and TX reclaim runs via tp->napi.poll inside of a software
 	 * interrupt.  Furthermore, IRQ processing runs lockless so we have
 	 * no IRQ context deadlocks to worry about either.  Rejoice!
 	 */
@@ -4087,7 +4077,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev)
 	len = skb_headlen(skb);
 
 	/* We are running in BH disabled context with netif_tx_lock
-	 * and TX reclaim runs via tp->poll inside of a software
+	 * and TX reclaim runs via tp->napi.poll inside of a software
 	 * interrupt.  Furthermore, IRQ processing runs lockless so we have
 	 * no IRQ context deadlocks to worry about either.  Rejoice!
 	 */
@@ -7456,6 +7446,7 @@ static int tg3_close(struct net_device *dev)
 {
 	struct tg3 *tp = netdev_priv(dev);
 
+	napi_disable(&tp->napi);
 	cancel_work_sync(&tp->reset_task);
 
 	netif_stop_queue(dev);
@@ -11896,9 +11887,8 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 	dev->set_mac_address = tg3_set_mac_addr;
 	dev->do_ioctl = tg3_ioctl;
 	dev->tx_timeout = tg3_tx_timeout;
-	dev->poll = tg3_poll;
+	netif_napi_add(dev, &tp->napi, tg3_poll, 64);
 	dev->ethtool_ops = &tg3_ethtool_ops;
-	dev->weight = 64;
 	dev->watchdog_timeo = TG3_TX_TIMEOUT;
 	dev->change_mtu = tg3_change_mtu;
 	dev->irq = pdev->irq;
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 5c21f49..a6a23bb 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -2176,6 +2176,7 @@ struct tg3 {
 	dma_addr_t			tx_desc_mapping;
 
 	/* begin "rx thread" cacheline section */
+	struct napi_struct		napi;
 	void				(*write32_rx_mbox) (struct tg3 *, u32,
 							    u32);
 	u32				rx_rcb_ptr;
diff --git a/drivers/net/tsi108_eth.c b/drivers/net/tsi108_eth.c
index 1aabc91..781e40b 100644
--- a/drivers/net/tsi108_eth.c
+++ b/drivers/net/tsi108_eth.c
@@ -79,6 +79,9 @@ struct tsi108_prv_data {
 	void  __iomem *regs;	/* Base of normal regs */
 	void  __iomem *phyregs;	/* Base of register bank used for PHY access */
 
+	struct net_device *dev;
+	struct napi_struct napi;
+
 	unsigned int phy;		/* Index of PHY for this interface */
 	unsigned int irq_num;
 	unsigned int id;
@@ -837,13 +840,13 @@ static int tsi108_refill_rx(struct net_device *dev, int budget)
 	return done;
 }
 
-static int tsi108_poll(struct net_device *dev, int *budget)
+static int tsi108_poll(struct napi_struct *napi, int budget)
 {
-	struct tsi108_prv_data *data = netdev_priv(dev);
+	struct tsi108_prv_data *data = container_of(napi, struct tsi108_prv_data, napi);
+	struct net_device *dev = data->dev;
 	u32 estat = TSI_READ(TSI108_EC_RXESTAT);
 	u32 intstat = TSI_READ(TSI108_EC_INTSTAT);
-	int total_budget = min(*budget, dev->quota);
-	int num_received = 0, num_filled = 0, budget_used;
+	int num_received = 0, num_filled = 0;
 
 	intstat &= TSI108_INT_RXQUEUE0 | TSI108_INT_RXTHRESH |
 	    TSI108_INT_RXOVERRUN | TSI108_INT_RXERROR | TSI108_INT_RXWAIT;
@@ -852,7 +855,7 @@ static int tsi108_poll(struct net_device *dev, int *budget)
 	TSI_WRITE(TSI108_EC_INTSTAT, intstat);
 
 	if (data->rxpending || (estat & TSI108_EC_RXESTAT_Q0_DESCINT))
-		num_received = tsi108_complete_rx(dev, total_budget);
+		num_received = tsi108_complete_rx(dev, budget);
 
 	/* This should normally fill no more slots than the number of
 	 * packets received in tsi108_complete_rx().  The exception
@@ -867,7 +870,7 @@ static int tsi108_poll(struct net_device *dev, int *budget)
 	 */
 
 	if (data->rxfree < TSI108_RXRING_LEN)
-		num_filled = tsi108_refill_rx(dev, total_budget * 2);
+		num_filled = tsi108_refill_rx(dev, budget * 2);
 
 	if (intstat & TSI108_INT_RXERROR) {
 		u32 err = TSI_READ(TSI108_EC_RXERR);
@@ -890,14 +893,9 @@ static int tsi108_poll(struct net_device *dev, int *budget)
 		spin_unlock_irq(&data->misclock);
 	}
 
-	budget_used = max(num_received, num_filled / 2);
-
-	*budget -= budget_used;
-	dev->quota -= budget_used;
-
-	if (budget_used != total_budget) {
+	if (num_received < budget) {
 		data->rxpending = 0;
-		netif_rx_complete(dev);
+		netif_rx_complete(dev, napi);
 
 		TSI_WRITE(TSI108_EC_INTMASK,
 				     TSI_READ(TSI108_EC_INTMASK)
@@ -906,14 +904,11 @@ static int tsi108_poll(struct net_device *dev, int *budget)
 					 TSI108_INT_RXOVERRUN |
 					 TSI108_INT_RXERROR |
 					 TSI108_INT_RXWAIT));
-
-		/* IRQs are level-triggered, so no need to re-check */
-		return 0;
 	} else {
 		data->rxpending = 1;
 	}
 
-	return 1;
+	return num_received;
 }
 
 static void tsi108_rx_int(struct net_device *dev)
@@ -931,7 +926,7 @@ static void tsi108_rx_int(struct net_device *dev)
 	 * from tsi108_check_rxring().
 	 */
 
-	if (netif_rx_schedule_prep(dev)) {
+	if (netif_rx_schedule_prep(dev, &data->napi)) {
 		/* Mask, rather than ack, the receive interrupts.  The ack
 		 * will happen in tsi108_poll().
 		 */
@@ -942,7 +937,7 @@ static void tsi108_rx_int(struct net_device *dev)
 				     | TSI108_INT_RXTHRESH |
 				     TSI108_INT_RXOVERRUN | TSI108_INT_RXERROR |
 				     TSI108_INT_RXWAIT);
-		__netif_rx_schedule(dev);
+		__netif_rx_schedule(dev, &data->napi);
 	} else {
 		if (!netif_running(dev)) {
 			/* This can happen if an interrupt occurs while the
@@ -1425,6 +1420,7 @@ static int tsi108_close(struct net_device *dev)
 	struct tsi108_prv_data *data = netdev_priv(dev);
 
 	netif_stop_queue(dev);
+	napi_disable(&data->napi);
 
 	del_timer_sync(&data->timer);
 
@@ -1562,6 +1558,7 @@ tsi108_init_one(struct platform_device *pdev)
 
 	printk("tsi108_eth%d: probe...\n", pdev->id);
 	data = netdev_priv(dev);
+	data->dev = dev;
 
 	pr_debug("tsi108_eth%d:regs:phyresgs:phy:irq_num=0x%x:0x%x:0x%x:0x%x\n",
 			pdev->id, einfo->regs, einfo->phyregs,
@@ -1597,9 +1594,8 @@ tsi108_init_one(struct platform_device *pdev)
 	dev->set_mac_address = tsi108_set_mac;
 	dev->set_multicast_list = tsi108_set_rx_mode;
 	dev->get_stats = tsi108_get_stats;
-	dev->poll = tsi108_poll;
+	netif_napi_add(dev, &data->napi, tsi108_poll, 64);
 	dev->do_ioctl = tsi108_do_ioctl;
-	dev->weight = 64;  /* 64 is more suitable for GigE interface - klai */
 
 	/* Apparently, the Linux networking code won't use scatter-gather
 	 * if the hardware doesn't do checksums.  However, it's faster
diff --git a/drivers/net/tulip/interrupt.c b/drivers/net/tulip/interrupt.c
index 53efd66..3653314 100644
--- a/drivers/net/tulip/interrupt.c
+++ b/drivers/net/tulip/interrupt.c
@@ -103,28 +103,29 @@ int tulip_refill_rx(struct net_device *dev)
 void oom_timer(unsigned long data)
 {
         struct net_device *dev = (struct net_device *)data;
-	netif_rx_schedule(dev);
+	struct tulip_private *tp = netdev_priv(dev);
+	netif_rx_schedule(dev, &tp->napi);
 }
 
-int tulip_poll(struct net_device *dev, int *budget)
+int tulip_poll(struct napi_struct *napi, int budget)
 {
-	struct tulip_private *tp = netdev_priv(dev);
+	struct tulip_private *tp = container_of(napi, struct tulip_private, napi);
+	struct net_device *dev = tp->dev;
 	int entry = tp->cur_rx % RX_RING_SIZE;
-	int rx_work_limit = *budget;
+	int work_done = 0;
+#ifdef CONFIG_TULIP_NAPI_HW_MITIGATION
 	int received = 0;
+#endif
 
 	if (!netif_running(dev))
 		goto done;
 
-	if (rx_work_limit > dev->quota)
-		rx_work_limit = dev->quota;
-
 #ifdef CONFIG_TULIP_NAPI_HW_MITIGATION
 
 /* that one buffer is needed for mit activation; or might be a
    bug in the ring buffer code; check later -- JHS*/
 
-        if (rx_work_limit >=RX_RING_SIZE) rx_work_limit--;
+        if (budget >=RX_RING_SIZE) budget--;
 #endif
 
 	if (tulip_debug > 4)
@@ -144,14 +145,13 @@ int tulip_poll(struct net_device *dev, int *budget)
                while ( ! (tp->rx_ring[entry].status & cpu_to_le32(DescOwned))) {
                        s32 status = le32_to_cpu(tp->rx_ring[entry].status);
 
-
                        if (tp->dirty_rx + RX_RING_SIZE == tp->cur_rx)
                                break;
 
                        if (tulip_debug > 5)
                                printk(KERN_DEBUG "%s: In tulip_rx(), entry %d %8.8x.\n",
                                       dev->name, entry, status);
-                       if (--rx_work_limit < 0)
+		       if (work_done++ >= budget)
                                goto not_done;
 
                        if ((status & 0x38008300) != 0x0300) {
@@ -238,7 +238,9 @@ int tulip_poll(struct net_device *dev, int *budget)
                                tp->stats.rx_packets++;
                                tp->stats.rx_bytes += pkt_len;
                        }
-                       received++;
+#ifdef CONFIG_TULIP_NAPI_HW_MITIGATION
+		       received++;
+#endif
 
                        entry = (++tp->cur_rx) % RX_RING_SIZE;
                        if (tp->cur_rx - tp->dirty_rx > RX_RING_SIZE/4)
@@ -296,17 +298,15 @@ done:
 
 #endif /* CONFIG_TULIP_NAPI_HW_MITIGATION */
 
-         dev->quota -= received;
-         *budget -= received;
-
          tulip_refill_rx(dev);
 
          /* If RX ring is not full we are out of memory. */
-         if (tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL) goto oom;
+         if (tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL)
+		 goto oom;
 
          /* Remove us from polling list and enable RX intr. */
 
-         netif_rx_complete(dev);
+         netif_rx_complete(dev, napi);
          iowrite32(tulip_tbl[tp->chip_id].valid_intrs, tp->base_addr+CSR7);
 
          /* The last op happens after poll completion. Which means the following:
@@ -320,28 +320,20 @@ done:
           * processed irqs. But it must not result in losing events.
           */
 
-         return 0;
+         return work_done;
 
  not_done:
-         if (!received) {
-
-                 received = dev->quota; /* Not to happen */
-         }
-         dev->quota -= received;
-         *budget -= received;
-
          if (tp->cur_rx - tp->dirty_rx > RX_RING_SIZE/2 ||
              tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL)
                  tulip_refill_rx(dev);
 
-         if (tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL) goto oom;
-
-         return 1;
+         if (tp->rx_buffers[tp->dirty_rx % RX_RING_SIZE].skb == NULL)
+		 goto oom;
 
+         return work_done;
 
  oom:    /* Executed with RX ints disabled */
 
-
          /* Start timer, stop polling, but do not enable rx interrupts. */
          mod_timer(&tp->oom_timer, jiffies+1);
 
@@ -350,9 +342,9 @@ done:
           * before we did netif_rx_complete(). See? We would lose it. */
 
          /* remove ourselves from the polling list */
-         netif_rx_complete(dev);
+         netif_rx_complete(dev, napi);
 
-         return 0;
+         return work_done;
 }
 
 #else /* CONFIG_TULIP_NAPI */
@@ -534,7 +526,7 @@ irqreturn_t tulip_interrupt(int irq, void *dev_instance)
 			rxd++;
 			/* Mask RX intrs and add the device to poll list. */
 			iowrite32(tulip_tbl[tp->chip_id].valid_intrs&~RxPollInt, ioaddr + CSR7);
-			netif_rx_schedule(dev);
+			netif_rx_schedule(dev, &tp->napi);
 
 			if (!(csr5&~(AbnormalIntr|NormalIntr|RxPollInt|TPLnkPass)))
                                break;
diff --git a/drivers/net/tulip/tulip.h b/drivers/net/tulip/tulip.h
index 16f26a8..5a4d727 100644
--- a/drivers/net/tulip/tulip.h
+++ b/drivers/net/tulip/tulip.h
@@ -353,6 +353,7 @@ struct tulip_private {
 	int chip_id;
 	int revision;
 	int flags;
+	struct napi_struct napi;
 	struct net_device_stats stats;
 	struct timer_list timer;	/* Media selection timer. */
 	struct timer_list oom_timer;    /* Out of memory timer. */
@@ -429,7 +430,7 @@ extern int tulip_rx_copybreak;
 irqreturn_t tulip_interrupt(int irq, void *dev_instance);
 int tulip_refill_rx(struct net_device *dev);
 #ifdef CONFIG_TULIP_NAPI
-int tulip_poll(struct net_device *dev, int *budget);
+int tulip_poll(struct napi_struct *napi, int budget);
 #endif
 
 
diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c
index eca984f..da05a3a 100644
--- a/drivers/net/tulip/tulip_core.c
+++ b/drivers/net/tulip/tulip_core.c
@@ -769,6 +769,7 @@ static int tulip_close (struct net_device *dev)
 	int i;
 
 	netif_stop_queue (dev);
+	napi_disable(&tp->napi);
 
 	tulip_down (dev);
 
@@ -1606,8 +1607,7 @@ static int __devinit tulip_init_one (struct pci_dev *pdev,
 	dev->tx_timeout = tulip_tx_timeout;
 	dev->watchdog_timeo = TX_TIMEOUT;
 #ifdef CONFIG_TULIP_NAPI
-	dev->poll = tulip_poll;
-	dev->weight = 16;
+	netif_napi_add(dev, &tp->napi, tulip_poll, 16);
 #endif
 	dev->stop = tulip_close;
 	dev->get_stats = tulip_get_stats;
diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c
index 0358720..842160f 100644
--- a/drivers/net/typhoon.c
+++ b/drivers/net/typhoon.c
@@ -284,6 +284,7 @@ struct typhoon {
 	struct basic_ring	rxLoRing;
 	struct pci_dev *	pdev;
 	struct net_device *	dev;
+	struct napi_struct	napi;
 	spinlock_t		state_lock;
 	struct vlan_group *	vlgrp;
 	struct basic_ring	rxHiRing;
@@ -1759,12 +1760,12 @@ typhoon_fill_free_ring(struct typhoon *tp)
 }
 
 static int
-typhoon_poll(struct net_device *dev, int *total_budget)
+typhoon_poll(struct napi_struct *napi, int budget)
 {
-	struct typhoon *tp = netdev_priv(dev);
+	struct typhoon *tp = container_of(napi, struct typhoon, napi);
+	struct net_device *dev = tp->dev;
 	struct typhoon_indexes *indexes = tp->indexes;
-	int orig_budget = *total_budget;
-	int budget, work_done, done;
+	int work_done;
 
 	rmb();
 	if(!tp->awaiting_resp && indexes->respReady != indexes->respCleared)
@@ -1773,30 +1774,16 @@ typhoon_poll(struct net_device *dev, int *total_budget)
 	if(le32_to_cpu(indexes->txLoCleared) != tp->txLoRing.lastRead)
 		typhoon_tx_complete(tp, &tp->txLoRing, &indexes->txLoCleared);
 
-	if(orig_budget > dev->quota)
-		orig_budget = dev->quota;
-
-	budget = orig_budget;
 	work_done = 0;
-	done = 1;
 
 	if(indexes->rxHiCleared != indexes->rxHiReady) {
-		work_done = typhoon_rx(tp, &tp->rxHiRing, &indexes->rxHiReady,
+		work_done += typhoon_rx(tp, &tp->rxHiRing, &indexes->rxHiReady,
 			   		&indexes->rxHiCleared, budget);
-		budget -= work_done;
 	}
 
 	if(indexes->rxLoCleared != indexes->rxLoReady) {
 		work_done += typhoon_rx(tp, &tp->rxLoRing, &indexes->rxLoReady,
-			   		&indexes->rxLoCleared, budget);
-	}
-
-	if(work_done) {
-		*total_budget -= work_done;
-		dev->quota -= work_done;
-
-		if(work_done >= orig_budget)
-			done = 0;
+			   		&indexes->rxLoCleared, budget - work_done);
 	}
 
 	if(le32_to_cpu(indexes->rxBuffCleared) == tp->rxBuffRing.lastWrite) {
@@ -1804,14 +1791,14 @@ typhoon_poll(struct net_device *dev, int *total_budget)
 		typhoon_fill_free_ring(tp);
 	}
 
-	if(done) {
-		netif_rx_complete(dev);
+	if (work_done < budget) {
+		netif_rx_complete(dev, napi);
 		iowrite32(TYPHOON_INTR_NONE,
 				tp->ioaddr + TYPHOON_REG_INTR_MASK);
 		typhoon_post_pci_writes(tp->ioaddr);
 	}
 
-	return (done ? 0 : 1);
+	return work_done;
 }
 
 static irqreturn_t
@@ -1828,10 +1815,10 @@ typhoon_interrupt(int irq, void *dev_instance)
 
 	iowrite32(intr_status, ioaddr + TYPHOON_REG_INTR_STATUS);
 
-	if(netif_rx_schedule_prep(dev)) {
+	if (netif_rx_schedule_prep(dev, &tp->napi)) {
 		iowrite32(TYPHOON_INTR_ALL, ioaddr + TYPHOON_REG_INTR_MASK);
 		typhoon_post_pci_writes(ioaddr);
-		__netif_rx_schedule(dev);
+		__netif_rx_schedule(dev, &tp->napi);
 	} else {
 		printk(KERN_ERR "%s: Error, poll already scheduled\n",
                        dev->name);
@@ -2150,6 +2137,7 @@ typhoon_close(struct net_device *dev)
 	struct typhoon *tp = netdev_priv(dev);
 
 	netif_stop_queue(dev);
+	napi_disable(&tp->napi);
 
 	if(typhoon_stop_runtime(tp, WaitSleep) < 0)
 		printk(KERN_ERR "%s: unable to stop runtime\n", dev->name);
@@ -2521,8 +2509,7 @@ typhoon_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->stop		= typhoon_close;
 	dev->set_multicast_list	= typhoon_set_rx_mode;
 	dev->tx_timeout		= typhoon_tx_timeout;
-	dev->poll		= typhoon_poll;
-	dev->weight		= 16;
+	netif_napi_add(dev, &tp->napi, typhoon_poll, 16);
 	dev->watchdog_timeo	= TX_TIMEOUT;
 	dev->get_stats		= typhoon_get_stats;
 	dev->set_mac_address	= typhoon_set_mac_address;
diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c
index 12e01b2..1512ab9 100644
--- a/drivers/net/ucc_geth.c
+++ b/drivers/net/ucc_geth.c
@@ -3582,41 +3582,31 @@ static int ucc_geth_tx(struct net_device *dev, u8 txQ)
 }
 
 #ifdef CONFIG_UGETH_NAPI
-static int ucc_geth_poll(struct net_device *dev, int *budget)
+static int ucc_geth_poll(struct napi_struct *napi, int budget)
 {
-	struct ucc_geth_private *ugeth = netdev_priv(dev);
+	struct ucc_geth_private *ugeth = container_of(napi, struct ucc_geth_private, napi);
+	struct net_device *dev = ugeth->dev;
 	struct ucc_geth_info *ug_info;
-	struct ucc_fast_private *uccf;
-	int howmany;
-	u8 i;
-	int rx_work_limit;
-	register u32 uccm;
+	int howmany, i;
 
 	ug_info = ugeth->ug_info;
 
-	rx_work_limit = *budget;
-	if (rx_work_limit > dev->quota)
-		rx_work_limit = dev->quota;
-
 	howmany = 0;
+	for (i = 0; i < ug_info->numQueuesRx; i++)
+		howmany += ucc_geth_rx(ugeth, i, budget - howmany);
 
-	for (i = 0; i < ug_info->numQueuesRx; i++) {
-		howmany += ucc_geth_rx(ugeth, i, rx_work_limit);
-	}
-
-	dev->quota -= howmany;
-	rx_work_limit -= howmany;
-	*budget -= howmany;
+	if (howmany < budget) {
+		struct ucc_fast_private *uccf;
+		u32 uccm;
 
-	if (rx_work_limit > 0) {
-		netif_rx_complete(dev);
+		netif_rx_complete(dev, napi);
 		uccf = ugeth->uccf;
 		uccm = in_be32(uccf->p_uccm);
 		uccm |= UCCE_RX_EVENTS;
 		out_be32(uccf->p_uccm, uccm);
 	}
 
-	return (rx_work_limit > 0) ? 0 : 1;
+	return howmany;
 }
 #endif				/* CONFIG_UGETH_NAPI */
 
@@ -3651,10 +3641,10 @@ static irqreturn_t ucc_geth_irq_handler(int irq, void *info)
 	/* check for receive events that require processing */
 	if (ucce & UCCE_RX_EVENTS) {
 #ifdef CONFIG_UGETH_NAPI
-		if (netif_rx_schedule_prep(dev)) {
-		uccm &= ~UCCE_RX_EVENTS;
+		if (netif_rx_schedule_prep(dev, &ugeth->napi)) {
+			uccm &= ~UCCE_RX_EVENTS;
 			out_be32(uccf->p_uccm, uccm);
-			__netif_rx_schedule(dev);
+			__netif_rx_schedule(dev, &ugeth->napi);
 		}
 #else
 		rx_mask = UCCE_RXBF_SINGLE_MASK;
@@ -3784,6 +3774,8 @@ static int ucc_geth_close(struct net_device *dev)
 
 	ugeth_vdbg("%s: IN", __FUNCTION__);
 
+	napi_disable(&ugeth->napi);
+
 	ucc_geth_stop(ugeth);
 
 	phy_disconnect(ugeth->phydev);
@@ -3964,8 +3956,7 @@ static int ucc_geth_probe(struct of_device* ofdev, const struct of_device_id *ma
 	dev->tx_timeout = ucc_geth_timeout;
 	dev->watchdog_timeo = TX_TIMEOUT;
 #ifdef CONFIG_UGETH_NAPI
-	dev->poll = ucc_geth_poll;
-	dev->weight = UCC_GETH_DEV_WEIGHT;
+	netif_napi_add(dev, &ugeth->napi, ucc_geth_poll, UCC_GETH_DEV_WEIGHT);
 #endif				/* CONFIG_UGETH_NAPI */
 	dev->stop = ucc_geth_close;
 	dev->get_stats = ucc_geth_get_stats;
diff --git a/drivers/net/ucc_geth.h b/drivers/net/ucc_geth.h
index bb4dac8..0579ba0 100644
--- a/drivers/net/ucc_geth.h
+++ b/drivers/net/ucc_geth.h
@@ -1184,6 +1184,7 @@ struct ucc_geth_private {
 	struct ucc_geth_info *ug_info;
 	struct ucc_fast_private *uccf;
 	struct net_device *dev;
+	struct napi_struct napi;
 	struct net_device_stats stats;	/* linux network statistics */
 	struct ucc_geth *ug_regs;
 	struct ucc_geth_init_pram *p_init_enet_param_shadow;
diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c
index c3fe230..6af7925 100644
--- a/drivers/net/via-rhine.c
+++ b/drivers/net/via-rhine.c
@@ -383,6 +383,8 @@ struct rhine_private {
 
 	struct pci_dev *pdev;
 	long pioaddr;
+	struct net_device *dev;
+	struct napi_struct napi;
 	struct net_device_stats stats;
 	spinlock_t lock;
 
@@ -576,28 +578,25 @@ static void rhine_poll(struct net_device *dev)
 #endif
 
 #ifdef CONFIG_VIA_RHINE_NAPI
-static int rhine_napipoll(struct net_device *dev, int *budget)
+static int rhine_napipoll(struct napi_struct *napi, int budget)
 {
-	struct rhine_private *rp = netdev_priv(dev);
+	struct rhine_private *rp = container_of(napi, struct rhine_private, napi);
+	struct net_device *dev = rp->dev;
 	void __iomem *ioaddr = rp->base;
-	int done, limit = min(dev->quota, *budget);
+	int work_done;
 
-	done = rhine_rx(dev, limit);
-	*budget -= done;
-	dev->quota -= done;
+	work_done = rhine_rx(dev, budget);
 
-	if (done < limit) {
-		netif_rx_complete(dev);
+	if (work_done < budget) {
+		netif_rx_complete(dev, napi);
 
 		iowrite16(IntrRxDone | IntrRxErr | IntrRxEmpty| IntrRxOverflow |
 			  IntrRxDropped | IntrRxNoBuf | IntrTxAborted |
 			  IntrTxDone | IntrTxError | IntrTxUnderrun |
 			  IntrPCIErr | IntrStatsMax | IntrLinkChange,
 			  ioaddr + IntrEnable);
-		return 0;
 	}
-	else
-		return 1;
+	return work_done;
 }
 #endif
 
@@ -701,6 +700,7 @@ static int __devinit rhine_init_one(struct pci_dev *pdev,
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
 	rp = netdev_priv(dev);
+	rp->dev = dev;
 	rp->quirks = quirks;
 	rp->pioaddr = pioaddr;
 	rp->pdev = pdev;
@@ -779,8 +779,7 @@ static int __devinit rhine_init_one(struct pci_dev *pdev,
 	dev->poll_controller = rhine_poll;
 #endif
 #ifdef CONFIG_VIA_RHINE_NAPI
-	dev->poll = rhine_napipoll;
-	dev->weight = 64;
+	netif_napi_add(dev, &rp->napi, rhine_napipoll, 64);
 #endif
 	if (rp->quirks & rqRhineI)
 		dev->features |= NETIF_F_SG|NETIF_F_HW_CSUM;
@@ -1055,7 +1054,7 @@ static void init_registers(struct net_device *dev)
 
 	rhine_set_rx_mode(dev);
 
-	netif_poll_enable(dev);
+	napi_enable(&rp->napi);
 
 	/* Enable interrupts by setting the interrupt mask. */
 	iowrite16(IntrRxDone | IntrRxErr | IntrRxEmpty| IntrRxOverflow |
@@ -1318,7 +1317,7 @@ static irqreturn_t rhine_interrupt(int irq, void *dev_instance)
 				  IntrPCIErr | IntrStatsMax | IntrLinkChange,
 				  ioaddr + IntrEnable);
 
-			netif_rx_schedule(dev);
+			netif_rx_schedule(dev, &rp->napi);
 #else
 			rhine_rx(dev, RX_RING_SIZE);
 #endif
@@ -1831,7 +1830,7 @@ static int rhine_close(struct net_device *dev)
 	spin_lock_irq(&rp->lock);
 
 	netif_stop_queue(dev);
-	netif_poll_disable(dev);
+	napi_disable(&rp->napi);
 
 	if (debug > 1)
 		printk(KERN_DEBUG "%s: Shutting down ethercard, "
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 489f69c..ec710eb 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -72,6 +72,7 @@ struct netfront_info {
 	struct list_head list;
 	struct net_device *netdev;
 
+	struct napi_struct napi;
 	struct net_device_stats stats;
 
 	struct xen_netif_tx_front_ring tx;
@@ -185,7 +186,8 @@ static int xennet_can_sg(struct net_device *dev)
 static void rx_refill_timeout(unsigned long data)
 {
 	struct net_device *dev = (struct net_device *)data;
-	netif_rx_schedule(dev);
+	struct netfront_info *np = netdev_priv(dev);
+	netif_rx_schedule(dev, &np->napi);
 }
 
 static int netfront_tx_slot_available(struct netfront_info *np)
@@ -347,7 +349,7 @@ static int xennet_open(struct net_device *dev)
 		xennet_alloc_rx_buffers(dev);
 		np->rx.sring->rsp_event = np->rx.rsp_cons + 1;
 		if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
-			netif_rx_schedule(dev);
+			netif_rx_schedule(dev, &np->napi);
 	}
 	spin_unlock_bh(&np->rx_lock);
 
@@ -588,6 +590,7 @@ static int xennet_close(struct net_device *dev)
 {
 	struct netfront_info *np = netdev_priv(dev);
 	netif_stop_queue(np->netdev);
+	napi_disable(&np->napi);
 	return 0;
 }
 
@@ -871,15 +874,16 @@ static int handle_incoming_queue(struct net_device *dev,
 	return packets_dropped;
 }
 
-static int xennet_poll(struct net_device *dev, int *pbudget)
+static int xennet_poll(struct napi_struct *napi, int budget)
 {
-	struct netfront_info *np = netdev_priv(dev);
+	struct netfront_info *np = container_of(napi, struct netfront_info, napi);
+	struct net_device *dev = np->netdev;
 	struct sk_buff *skb;
 	struct netfront_rx_info rinfo;
 	struct xen_netif_rx_response *rx = &rinfo.rx;
 	struct xen_netif_extra_info *extras = rinfo.extras;
 	RING_IDX i, rp;
-	int work_done, budget, more_to_do = 1;
+	int work_done, budget;
 	struct sk_buff_head rxq;
 	struct sk_buff_head errq;
 	struct sk_buff_head tmpq;
@@ -898,9 +902,6 @@ static int xennet_poll(struct net_device *dev, int *pbudget)
 	skb_queue_head_init(&errq);
 	skb_queue_head_init(&tmpq);
 
-	budget = *pbudget;
-	if (budget > dev->quota)
-		budget = dev->quota;
 	rp = np->rx.sring->rsp_prod;
 	rmb(); /* Ensure we see queued responses up to 'rp'. */
 
@@ -1005,22 +1006,21 @@ err:
 
 	xennet_alloc_rx_buffers(dev);
 
-	*pbudget   -= work_done;
-	dev->quota -= work_done;
-
 	if (work_done < budget) {
+		int more_to_do = 0;
+
 		local_irq_save(flags);
 
 		RING_FINAL_CHECK_FOR_RESPONSES(&np->rx, more_to_do);
 		if (!more_to_do)
-			__netif_rx_complete(dev);
+			__netif_rx_complete(dev, napi);
 
 		local_irq_restore(flags);
 	}
 
 	spin_unlock(&np->rx_lock);
 
-	return more_to_do;
+	return work_done;
 }
 
 static int xennet_change_mtu(struct net_device *dev, int mtu)
@@ -1200,10 +1200,9 @@ static struct net_device * __devinit xennet_create_dev(struct xenbus_device *dev
 	netdev->hard_start_xmit = xennet_start_xmit;
 	netdev->stop            = xennet_close;
 	netdev->get_stats       = xennet_get_stats;
-	netdev->poll            = xennet_poll;
+	netif_napi_add(netdev, &np->napi, xennet_poll, 64);
 	netdev->uninit          = xennet_uninit;
 	netdev->change_mtu	= xennet_change_mtu;
-	netdev->weight          = 64;
 	netdev->features        = NETIF_F_IP_CSUM;
 
 	SET_ETHTOOL_OPS(netdev, &xennet_ethtool_ops);
@@ -1348,7 +1347,7 @@ static irqreturn_t xennet_interrupt(int irq, void *dev_id)
 		xennet_tx_buf_gc(dev);
 		/* Under tx_lock: protects access to rx shared-ring indexes. */
 		if (RING_HAS_UNCONSUMED_RESPONSES(&np->rx))
-			netif_rx_schedule(dev);
+			netif_rx_schedule(dev, &np->napi);
 	}
 
 	spin_unlock_irqrestore(&np->tx_lock, flags);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4a616d7..bcfd1da 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -31,6 +31,7 @@
 
 #ifdef __KERNEL__
 #include <linux/timer.h>
+#include <linux/delay.h>
 #include <asm/atomic.h>
 #include <asm/cache.h>
 #include <asm/byteorder.h>
@@ -38,6 +39,7 @@
 #include <linux/device.h>
 #include <linux/percpu.h>
 #include <linux/dmaengine.h>
+#include <linux/workqueue.h>
 
 struct vlan_group;
 struct ethtool_ops;
@@ -258,7 +260,6 @@ enum netdev_state_t
 	__LINK_STATE_PRESENT,
 	__LINK_STATE_SCHED,
 	__LINK_STATE_NOCARRIER,
-	__LINK_STATE_RX_SCHED,
 	__LINK_STATE_LINKWATCH_PENDING,
 	__LINK_STATE_DORMANT,
 	__LINK_STATE_QDISC_RUNNING,
@@ -278,6 +279,61 @@ struct netdev_boot_setup {
 extern int __init netdev_boot_setup(char *str);
 
 /*
+ * Structure for NAPI scheduling similar to tasklet but with weighting
+ */
+struct napi_struct {
+	struct list_head	poll_list;
+	unsigned long		state;
+	int			weight;
+	int			quota;
+	int			(*poll)(struct napi_struct *, int);
+#ifdef CONFIG_NETPOLL
+	spinlock_t		poll_lock;
+	int			poll_owner;
+	struct net_device	*dev;
+	struct list_head	dev_list;
+#endif
+};
+
+enum
+{
+	NAPI_STATE_SCHED,	/* Poll is scheduled */
+};
+
+extern void FASTCALL(__napi_schedule(struct napi_struct *n));
+
+static inline int napi_schedule_prep(struct napi_struct *n)
+{
+	return !test_and_set_bit(NAPI_STATE_SCHED, &n->state);
+}
+
+static inline void napi_schedule(struct napi_struct *n)
+{
+	if (napi_schedule_prep(n))
+		__napi_schedule(n);
+}
+
+static inline void napi_complete(struct napi_struct *n)
+{
+	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
+	smp_mb__before_clear_bit();
+	clear_bit(NAPI_STATE_SCHED, &n->state);
+}
+
+static inline void napi_disable(struct napi_struct *n)
+{
+	while (test_and_set_bit(NAPI_STATE_SCHED, &n->state))
+		msleep_interruptible(1);
+}
+
+static inline void napi_enable(struct napi_struct *n)
+{
+	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
+	smp_mb__before_clear_bit();
+	clear_bit(NAPI_STATE_SCHED, &n->state);
+}
+
+/*
  *	The DEVICE structure.
  *	Actually, this whole structure is a big mistake.  It mixes I/O
  *	data with strictly "high-level" data, and it has to know about
@@ -319,6 +375,9 @@ struct net_device
 	unsigned long		state;
 
 	struct list_head	dev_list;
+#ifdef CONFIG_NETPOLL
+	struct list_head	napi_list;
+#endif
 	
 	/* The device initialization function. Called only once. */
 	int			(*init)(struct net_device *dev);
@@ -430,12 +489,6 @@ struct net_device
 /*
  * Cache line mostly used on receive path (including eth_type_trans())
  */
-	struct list_head	poll_list ____cacheline_aligned_in_smp;
-					/* Link to poll list	*/
-
-	int			(*poll) (struct net_device *dev, int *quota);
-	int			quota;
-	int			weight;
 	unsigned long		last_rx;	/* Time of last Rx	*/
 	/* Interface address info used in eth_type_trans() */
 	unsigned char		dev_addr[MAX_ADDR_LEN];	/* hw address, (before bcast 
@@ -593,6 +646,22 @@ static inline void *netdev_priv(const struct net_device *dev)
  */
 #define SET_NETDEV_DEV(net, pdev)	((net)->dev.parent = (pdev))
 
+static inline void netif_napi_add(struct net_device *dev,
+				  struct napi_struct *napi,
+				  int (*poll)(struct napi_struct *, int),
+				  int weight)
+{
+	INIT_LIST_HEAD(&napi->poll_list);
+	napi->poll = poll;
+	napi->weight = weight;
+#ifdef CONFIG_NETPOLL
+	napi->dev = dev;
+	list_add(&napi->dev_list, &dev->napi_list);
+	spin_lock_init(&napi->poll_lock);
+	napi->poll_owner = -1;
+#endif
+}
+
 struct packet_type {
 	__be16			type;	/* This is really htons(ether_type). */
 	struct net_device	*dev;	/* NULL is wildcarded here	     */
@@ -678,7 +747,6 @@ static inline int unregister_gifconf(unsigned int family)
  * Incoming packets are placed on per-cpu queues so that
  * no locking is needed.
  */
-
 struct softnet_data
 {
 	struct net_device	*output_queue;
@@ -686,7 +754,7 @@ struct softnet_data
 	struct list_head	poll_list;
 	struct sk_buff		*completion_queue;
 
-	struct net_device	backlog_dev;	/* Sorry. 8) */
+	struct napi_struct	backlog;
 #ifdef CONFIG_NET_DMA
 	struct dma_chan		*net_dma;
 #endif
@@ -796,20 +864,7 @@ static inline int netif_is_multiqueue(const struct net_device *dev)
 /* Use this variant when it is known for sure that it
  * is executing from interrupt context.
  */
-static inline void dev_kfree_skb_irq(struct sk_buff *skb)
-{
-	if (atomic_dec_and_test(&skb->users)) {
-		struct softnet_data *sd;
-		unsigned long flags;
-
-		local_irq_save(flags);
-		sd = &__get_cpu_var(softnet_data);
-		skb->next = sd->completion_queue;
-		sd->completion_queue = skb;
-		raise_softirq_irqoff(NET_TX_SOFTIRQ);
-		local_irq_restore(flags);
-	}
-}
+extern void dev_kfree_skb_irq(struct sk_buff *skb);
 
 /* Use this variant in places where it could be invoked
  * either from interrupt or non-interrupt context.
@@ -955,60 +1010,58 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits)
 	return (1 << debug_value) - 1;
 }
 
-/* Test if receive needs to be scheduled */
-static inline int __netif_rx_schedule_prep(struct net_device *dev)
+/* Try to reschedule poll. Called by dev->poll() after netif_rx_complete().
+ * Do not inline this?
+ */
+static inline int netif_rx_reschedule(struct napi_struct *n)
 {
-	return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state);
+	if (napi_schedule_prep(n)) {
+		unsigned long flags;
+
+		local_irq_save(flags);
+		list_add_tail(&n->poll_list,
+			      &__get_cpu_var(softnet_data).poll_list);
+		__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+		local_irq_restore(flags);
+		return 1;
+	}
+	return 0;
 }
 
 /* Test if receive needs to be scheduled but only if up */
-static inline int netif_rx_schedule_prep(struct net_device *dev)
+static inline int netif_rx_schedule_prep(struct net_device *dev,
+					 struct napi_struct *napi)
 {
-	return netif_running(dev) && __netif_rx_schedule_prep(dev);
+	return netif_running(dev) && napi_schedule_prep(napi);
 }
 
 /* Add interface to tail of rx poll list. This assumes that _prep has
  * already been called and returned 1.
  */
-
-extern void __netif_rx_schedule(struct net_device *dev);
-
-/* Try to reschedule poll. Called by irq handler. */
-
-static inline void netif_rx_schedule(struct net_device *dev)
+static inline void __netif_rx_schedule(struct net_device *dev,
+				       struct napi_struct *napi)
 {
-	if (netif_rx_schedule_prep(dev))
-		__netif_rx_schedule(dev);
+	dev_hold(dev);
+	__napi_schedule(napi);
 }
 
-/* Try to reschedule poll. Called by dev->poll() after netif_rx_complete().
- * Do not inline this?
- */
-static inline int netif_rx_reschedule(struct net_device *dev, int undo)
-{
-	if (netif_rx_schedule_prep(dev)) {
-		unsigned long flags;
-
-		dev->quota += undo;
+/* Try to reschedule poll. Called by irq handler. */
 
-		local_irq_save(flags);
-		list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
-		__raise_softirq_irqoff(NET_RX_SOFTIRQ);
-		local_irq_restore(flags);
-		return 1;
-	}
-	return 0;
+static inline void netif_rx_schedule(struct net_device *dev,
+				     struct napi_struct *napi)
+{
+	if (netif_rx_schedule_prep(dev, napi))
+		__netif_rx_schedule(dev, napi);
 }
 
 /* same as netif_rx_complete, except that local_irq_save(flags)
  * has already been issued
  */
-static inline void __netif_rx_complete(struct net_device *dev)
+static inline void __netif_rx_complete(struct net_device *dev,
+				       struct napi_struct *napi)
 {
-	BUG_ON(!test_bit(__LINK_STATE_RX_SCHED, &dev->state));
-	list_del(&dev->poll_list);
-	smp_mb__before_clear_bit();
-	clear_bit(__LINK_STATE_RX_SCHED, &dev->state);
+	napi_complete(napi);
+	dev_put(dev);
 }
 
 /* Remove interface from poll list: it must be in the poll list
@@ -1016,28 +1069,16 @@ static inline void __netif_rx_complete(struct net_device *dev)
  * it completes the work. The device cannot be out of poll list at this
  * moment, it is BUG().
  */
-static inline void netif_rx_complete(struct net_device *dev)
+static inline void netif_rx_complete(struct net_device *dev,
+				     struct napi_struct *napi)
 {
 	unsigned long flags;
 
 	local_irq_save(flags);
-	__netif_rx_complete(dev);
+	__netif_rx_complete(dev, napi);
 	local_irq_restore(flags);
 }
 
-static inline void netif_poll_disable(struct net_device *dev)
-{
-	while (test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state))
-		/* No hurry. */
-		schedule_timeout_interruptible(1);
-}
-
-static inline void netif_poll_enable(struct net_device *dev)
-{
-	smp_mb__before_clear_bit();
-	clear_bit(__LINK_STATE_RX_SCHED, &dev->state);
-}
-
 static inline void netif_tx_lock(struct net_device *dev)
 {
 	spin_lock(&dev->_xmit_lock);
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 29930b7..4535ba3 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -25,8 +25,6 @@ struct netpoll {
 
 struct netpoll_info {
 	atomic_t refcnt;
-	spinlock_t poll_lock;
-	int poll_owner;
 	int rx_flags;
 	spinlock_t rx_lock;
 	struct netpoll *rx_np; /* netpoll that registered an rx_hook */
@@ -64,32 +62,61 @@ static inline int netpoll_rx(struct sk_buff *skb)
 	return ret;
 }
 
-static inline void *netpoll_poll_lock(struct net_device *dev)
+static inline int netpoll_receive_skb(struct sk_buff *skb)
 {
+	if (!list_empty(&skb->dev->napi_list))
+		return netpoll_rx(skb);
+	return 0;
+}
+
+static inline void *netpoll_poll_lock(struct napi_struct *napi)
+{
+	struct net_device *dev = napi->dev;
+
 	rcu_read_lock(); /* deal with race on ->npinfo */
 	if (dev->npinfo) {
-		spin_lock(&dev->npinfo->poll_lock);
-		dev->npinfo->poll_owner = smp_processor_id();
-		return dev->npinfo;
+		spin_lock(&napi->poll_lock);
+		napi->poll_owner = smp_processor_id();
+		return napi;
 	}
 	return NULL;
 }
 
 static inline void netpoll_poll_unlock(void *have)
 {
-	struct netpoll_info *npi = have;
+	struct napi_struct *napi = have;
 
-	if (npi) {
-		npi->poll_owner = -1;
-		spin_unlock(&npi->poll_lock);
+	if (napi) {
+		napi->poll_owner = -1;
+		spin_unlock(&napi->poll_lock);
 	}
 	rcu_read_unlock();
 }
 
+static inline void netpoll_netdev_init(struct net_device *dev)
+{
+	INIT_LIST_HEAD(&dev->napi_list);
+}
+
 #else
-#define netpoll_rx(a) 0
-#define netpoll_poll_lock(a) NULL
-#define netpoll_poll_unlock(a)
+static inline int netpoll_rx(struct sk_buff *skb)
+{
+	return 0;
+}
+static inline int netpoll_receive_skb(struct sk_buff *skb)
+{
+	return 0;
+}
+static inline void *netpoll_poll_lock(struct napi_struct *napi)
+{
+	return NULL;
+}
+static inline void netpoll_poll_unlock(void *have)
+{
+}
+static inline void netpoll_netdev_init(struct net_device *dev)
+{
+}
 #endif
 
 #endif
diff --git a/net/core/dev.c b/net/core/dev.c
index 6cc8a70..d54924e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -220,7 +220,8 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
  *	Device drivers call our routines to queue packets here. We empty the
  *	queue in the local softnet handler.
  */
-DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
+
+DEFINE_PER_CPU(struct softnet_data, softnet_data);
 
 #ifdef CONFIG_SYSFS
 extern int netdev_sysfs_init(void);
@@ -1018,16 +1019,12 @@ int dev_close(struct net_device *dev)
 	clear_bit(__LINK_STATE_START, &dev->state);
 
 	/* Synchronize to scheduled poll. We cannot touch poll list,
-	 * it can be even on different cpu. So just clear netif_running(),
-	 * and wait when poll really will happen. Actually, the best place
-	 * for this is inside dev->stop() after device stopped its irq
-	 * engine, but this requires more changes in devices. */
-
+	 * it can be even on different cpu. So just clear netif_running().
+	 *
+	 * dev->stop() will invoke napi_disable() on all of it's
+	 * napi_struct instances on this device.
+	 */
 	smp_mb__after_clear_bit(); /* Commit netif_running(). */
-	while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
-		/* No hurry. */
-		msleep(1);
-	}
 
 	/*
 	 *	Call the device specific close. This cannot fail.
@@ -1233,21 +1230,21 @@ void __netif_schedule(struct net_device *dev)
 }
 EXPORT_SYMBOL(__netif_schedule);
 
-void __netif_rx_schedule(struct net_device *dev)
+void dev_kfree_skb_irq(struct sk_buff *skb)
 {
-	unsigned long flags;
+	if (atomic_dec_and_test(&skb->users)) {
+		struct softnet_data *sd;
+		unsigned long flags;
 
-	local_irq_save(flags);
-	dev_hold(dev);
-	list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
-	if (dev->quota < 0)
-		dev->quota += dev->weight;
-	else
-		dev->quota = dev->weight;
-	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
-	local_irq_restore(flags);
+		local_irq_save(flags);
+		sd = &__get_cpu_var(softnet_data);
+		skb->next = sd->completion_queue;
+		sd->completion_queue = skb;
+		raise_softirq_irqoff(NET_TX_SOFTIRQ);
+		local_irq_restore(flags);
+	}
 }
-EXPORT_SYMBOL(__netif_rx_schedule);
+EXPORT_SYMBOL(dev_kfree_skb_irq);
 
 void dev_kfree_skb_any(struct sk_buff *skb)
 {
@@ -1730,7 +1727,7 @@ enqueue:
 			return NET_RX_SUCCESS;
 		}
 
-		netif_rx_schedule(&queue->backlog_dev);
+		napi_schedule(&queue->backlog);
 		goto enqueue;
 	}
 
@@ -1771,6 +1768,7 @@ static inline struct net_device *skb_bond(struct sk_buff *skb)
 	return dev;
 }
 
+
 static void net_tx_action(struct softirq_action *h)
 {
 	struct softnet_data *sd = &__get_cpu_var(softnet_data);
@@ -1927,7 +1925,7 @@ int netif_receive_skb(struct sk_buff *skb)
 	__be16 type;
 
 	/* if we've gotten here through NAPI, check netpoll */
-	if (skb->dev->poll && netpoll_rx(skb))
+	if (netpoll_receive_skb(skb))
 		return NET_RX_DROP;
 
 	if (!skb->tstamp.tv64)
@@ -2017,90 +2015,101 @@ out:
 	return ret;
 }
 
-static int process_backlog(struct net_device *backlog_dev, int *budget)
+static int process_backlog(struct napi_struct *napi, int quota)
 {
 	int work = 0;
-	int quota = min(backlog_dev->quota, *budget);
 	struct softnet_data *queue = &__get_cpu_var(softnet_data);
 	unsigned long start_time = jiffies;
 
-	backlog_dev->weight = weight_p;
-	for (;;) {
+	napi->weight = weight_p;
+	do {
 		struct sk_buff *skb;
 		struct net_device *dev;
 
 		local_irq_disable();
 		skb = __skb_dequeue(&queue->input_pkt_queue);
-		if (!skb)
-			goto job_done;
 		local_irq_enable();
-
+		if (!skb) {
+			napi_complete(napi);
+			break;
+		}
+	
 		dev = skb->dev;
 
 		netif_receive_skb(skb);
 
 		dev_put(dev);
+	} while (++work < quota && jiffies == start_time);
 
-		work++;
-
-		if (work >= quota || jiffies - start_time > 1)
-			break;
-
-	}
-
-	backlog_dev->quota -= work;
-	*budget -= work;
-	return -1;
+	return work;
+}
 
-job_done:
-	backlog_dev->quota -= work;
-	*budget -= work;
+/**
+ * __napi_schedule - schedule for receive
+ * @napi: entry to schedule
+ *
+ * The entry's receive function will be scheduled to run
+ */
+void fastcall __napi_schedule(struct napi_struct *n)
+{
+	unsigned long flags;
 
-	list_del(&backlog_dev->poll_list);
-	smp_mb__before_clear_bit();
-	netif_poll_enable(backlog_dev);
+	if (n->quota < 0)
+		n->quota += n->weight;
+	else
+		n->quota = n->weight;
 
-	local_irq_enable();
-	return 0;
+	local_irq_save(flags);
+	list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
+	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+	local_irq_restore(flags);
 }
+EXPORT_SYMBOL(__napi_schedule);
+
 
 static void net_rx_action(struct softirq_action *h)
 {
-	struct softnet_data *queue = &__get_cpu_var(softnet_data);
+	struct list_head list;
 	unsigned long start_time = jiffies;
 	int budget = netdev_budget;
 	void *have;
 
 	local_irq_disable();
+	list_replace_init(&__get_cpu_var(softnet_data).poll_list, &list);
+	local_irq_enable();
 
-	while (!list_empty(&queue->poll_list)) {
-		struct net_device *dev;
+	while (!list_empty(&list)) {
+		struct napi_struct *n;
+
+		/* if softirq window is exhuasted then punt */
+		if (unlikely(budget <= 0 || jiffies != start_time)) {
+			local_irq_disable();
+			list_splice(&list, &__get_cpu_var(softnet_data).poll_list);
+			__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+			local_irq_enable();
+			break;
+		}
 
-		if (budget <= 0 || jiffies - start_time > 1)
-			goto softnet_break;
+		n = list_entry(list.next, struct napi_struct, poll_list);
 
-		local_irq_enable();
+		have = netpoll_poll_lock(n);
 
-		dev = list_entry(queue->poll_list.next,
-				 struct net_device, poll_list);
-		have = netpoll_poll_lock(dev);
+		list_del(&n->poll_list);
 
-		if (dev->quota <= 0 || dev->poll(dev, &budget)) {
-			netpoll_poll_unlock(have);
-			local_irq_disable();
-			list_move_tail(&dev->poll_list, &queue->poll_list);
-			if (dev->quota < 0)
-				dev->quota += dev->weight;
-			else
-				dev->quota = dev->weight;
-		} else {
-			netpoll_poll_unlock(have);
-			dev_put(dev);
-			local_irq_disable();
+		/* if quota not exhausted process work */
+		if (likely(n->quota > 0)) {
+			int work = n->poll(n, min(budget, n->quota));
+
+			budget -= work;
+			n->quota -= work;
 		}
+
+		/* if napi_complete not called, reschedule */
+		if (test_bit(NAPI_STATE_SCHED, &n->state))
+			__napi_schedule(n);
+
+		netpoll_poll_unlock(have);
 	}
-out:
-	local_irq_enable();
 #ifdef CONFIG_NET_DMA
 	/*
 	 * There may not be any more sk_buffs coming right now, so push
@@ -2115,12 +2124,6 @@ out:
 		}
 	}
 #endif
-	return;
-
-softnet_break:
-	__get_cpu_var(netdev_rx_stat).time_squeeze++;
-	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
-	goto out;
 }
 
 static gifconf_func_t * gifconf_list [NPROTO];
@@ -3704,6 +3707,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	dev->egress_subqueue_count = queue_count;
 
 	dev->get_stats = internal_stats;
+	netpoll_netdev_init(dev);
 	setup(dev);
 	strcpy(dev->name, name);
 	return dev;
@@ -4037,10 +4041,9 @@ static int __init net_dev_init(void)
 		skb_queue_head_init(&queue->input_pkt_queue);
 		queue->completion_queue = NULL;
 		INIT_LIST_HEAD(&queue->poll_list);
-		set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
-		queue->backlog_dev.weight = weight_p;
-		queue->backlog_dev.poll = process_backlog;
-		atomic_set(&queue->backlog_dev.refcnt, 1);
+
+		queue->backlog.poll = process_backlog;
+		queue->backlog.weight = weight_p;
 	}
 
 	netdev_dma_register();
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 5c19b06..79159db 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -216,20 +216,6 @@ static ssize_t store_tx_queue_len(struct device *dev,
 	return netdev_store(dev, attr, buf, len, change_tx_queue_len);
 }
 
-NETDEVICE_SHOW(weight, fmt_dec);
-
-static int change_weight(struct net_device *net, unsigned long new_weight)
-{
-	net->weight = new_weight;
-	return 0;
-}
-
-static ssize_t store_weight(struct device *dev, struct device_attribute *attr,
-			    const char *buf, size_t len)
-{
-	return netdev_store(dev, attr, buf, len, change_weight);
-}
-
 static struct device_attribute net_class_attributes[] = {
 	__ATTR(addr_len, S_IRUGO, show_addr_len, NULL),
 	__ATTR(iflink, S_IRUGO, show_iflink, NULL),
@@ -246,7 +232,6 @@ static struct device_attribute net_class_attributes[] = {
 	__ATTR(flags, S_IRUGO | S_IWUSR, show_flags, store_flags),
 	__ATTR(tx_queue_len, S_IRUGO | S_IWUSR, show_tx_queue_len,
 	       store_tx_queue_len),
-	__ATTR(weight, S_IRUGO | S_IWUSR, show_weight, store_weight),
 	{}
 };
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index de1b26a..abe6e3a 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -119,19 +119,22 @@ static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
 static void poll_napi(struct netpoll *np)
 {
 	struct netpoll_info *npinfo = np->dev->npinfo;
+	struct napi_struct *napi;
 	int budget = 16;
 
-	if (test_bit(__LINK_STATE_RX_SCHED, &np->dev->state) &&
-	    npinfo->poll_owner != smp_processor_id() &&
-	    spin_trylock(&npinfo->poll_lock)) {
-		npinfo->rx_flags |= NETPOLL_RX_DROP;
-		atomic_inc(&trapped);
+	list_for_each_entry(napi, &np->dev->napi_list, dev_list) {
+		if (test_bit(NAPI_STATE_SCHED, &napi->state) &&
+		    napi->poll_owner != smp_processor_id() &&
+		    spin_trylock(&napi->poll_lock)) {
+			npinfo->rx_flags |= NETPOLL_RX_DROP;
+			atomic_inc(&trapped);
 
-		np->dev->poll(np->dev, &budget);
+			napi->poll(napi, budget);
 
-		atomic_dec(&trapped);
-		npinfo->rx_flags &= ~NETPOLL_RX_DROP;
-		spin_unlock(&npinfo->poll_lock);
+			atomic_dec(&trapped);
+			npinfo->rx_flags &= ~NETPOLL_RX_DROP;
+			spin_unlock(&napi->poll_lock);
+		}
 	}
 }
 
@@ -157,7 +160,7 @@ void netpoll_poll(struct netpoll *np)
 
 	/* Process pending work on NIC */
 	np->dev->poll_controller(np->dev);
-	if (np->dev->poll)
+	if (!list_empty(&np->dev->napi_list))
 		poll_napi(np);
 
 	service_arp_queue(np->dev->npinfo);
@@ -233,6 +236,17 @@ repeat:
 	return skb;
 }
 
+static int netpoll_owner_active(struct net_device *dev)
+{
+	struct napi_struct *napi;
+
+	list_for_each_entry(napi, &dev->napi_list, dev_list) {
+		if (napi->poll_owner == smp_processor_id())
+			return 1;
+	}
+	return 0;
+}
+
 static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 {
 	int status = NETDEV_TX_BUSY;
@@ -246,8 +260,7 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 	}
 
 	/* don't get messages out of order, and no recursion */
-	if (skb_queue_len(&npinfo->txq) == 0 &&
-		    npinfo->poll_owner != smp_processor_id()) {
+	if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
 		unsigned long flags;
 
 		local_irq_save(flags);
@@ -652,8 +665,6 @@ int netpoll_setup(struct netpoll *np)
 
 		npinfo->rx_flags = 0;
 		npinfo->rx_np = NULL;
-		spin_lock_init(&npinfo->poll_lock);
-		npinfo->poll_owner = -1;
 
 		spin_lock_init(&npinfo->rx_lock);
 		skb_queue_head_init(&npinfo->arp_tx);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4756d58..2b0b6fa 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -634,7 +634,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 
 	NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name);
 	NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len);
-	NLA_PUT_U32(skb, IFLA_WEIGHT, dev->weight);
 	NLA_PUT_U8(skb, IFLA_OPERSTATE,
 		   netif_running(dev) ? dev->operstate : IF_OPER_DOWN);
 	NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode);
@@ -834,9 +833,6 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 	if (tb[IFLA_TXQLEN])
 		dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
 
-	if (tb[IFLA_WEIGHT])
-		dev->weight = nla_get_u32(tb[IFLA_WEIGHT]);
-
 	if (tb[IFLA_OPERSTATE])
 		set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
 
@@ -1074,8 +1070,6 @@ replay:
 			       nla_len(tb[IFLA_BROADCAST]));
 		if (tb[IFLA_TXQLEN])
 			dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]);
-		if (tb[IFLA_WEIGHT])
-			dev->weight = nla_get_u32(tb[IFLA_WEIGHT]);
 		if (tb[IFLA_OPERSTATE])
 			set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
 		if (tb[IFLA_LINKMODE])

^ permalink raw reply related

* Re: [PATCH RFC]: napi_struct V5
From: David Miller @ 2007-08-08  4:08 UTC (permalink / raw)
  To: rdreier; +Cc: netdev, shemminger, jgarzik, hadi, rusty
In-Reply-To: <adahcnar85j.fsf@cisco.com>

From: Roland Dreier <rdreier@cisco.com>
Date: Tue, 07 Aug 2007 20:56:40 -0700

> First, there's pretty much universal agreement that
> patches should only contain one idea ("separate your changes"), that
> cleanups should not be mixed in with other changes, etc, etc.

Fine I'll revert.

> Although frankly, I have to say that your position here doesn't make
> much sense.

I think it does, the inconsistencies shown in each driver were
either 1) unnecessarily being different or 2) causing outright
races and bugs (see the discussions between Rusty and myself
during the first few revisions).

> In your earlier patches that got rid of netif_rx_reschedule(),
> your suggestion on how to handle the missed event race was to ask the
> hardware to trigger another event from the poll routine so it got
> rescheduled.

And I have rescinded this idea, and the removal of the resched
interface, for hardware not capable in response to your feedback.

> Being treated with the same level of collegiality that (I think) I
> treat you with would be appreciated.

My apologies.

I tend to lose it when people get hyper-critical on a change of which
I am doing all of the work editing a large number of drivers and for
which everyone benfits.

^ permalink raw reply

* Re: [PATCH RFC]: napi_struct V5
From: Roland Dreier @ 2007-08-08  3:56 UTC (permalink / raw)
  To: David Miller; +Cc: netdev, shemminger, jgarzik, hadi, rusty
In-Reply-To: <20070807.160651.98554604.davem@davemloft.net>

 > >  >  		n = ib_poll_cq(priv->cq, t, priv->ibwc);
 > >  >  
 > >  > -		for (i = 0; i < n; ++i) {
 > >  > +		for (i = 0; i < n; i++) {
 > > 
 > > it might be nicer to avoid noise like this in the patch.

 > That one was just too much of an eye sore to ignore and it
 > effect my ability to audit the change I was making.
 > 
 > I mean, this is one of the first precise examples of kinds of
 > programming that lead to subtle bugs mentioned in The Practice of
 > Programming.
 > 
 > So this is staying in the patch, sorry.

This is a pretty minor point but this attitude is a little too much
for me to take.  First, there's pretty much universal agreement that
patches should only contain one idea ("separate your changes"), that
cleanups should not be mixed in with other changes, etc, etc.

Second, you know as well as I do *why* patches aren't supposed to do
this.  Adding more lines of changes into your patch is exactly what
makes it *harder* for everyone to audit.  Every single person who
reviews the ipoib part of the patch has to look at that change and
waste a few seconds realizing, "oh, I see, the only difference here is
a cleanup unrelated to the NAPI conversion."  As for "subtle bugs" --
we both know that even the most obviously safe changes always have a
chance at introducing a bug, and it's always safer to leave something
alone.

So if you want to tinker with the code here, fine, it's just a
harmless annoyance -- I can spend the tiny amount of time to check
that the change is OK.  But don't try to tell me it's good programming
practice.  I know that you know better: in commit a2fb23af, you had
the sense to leave the line

	for (i = 0; i < PCI_NUM_RESOURCES - PCI_BRIDGE_RESOURCES; ++i) {

alone when you copied it from arch/powerpc, rather than tinkering with
known-working code.

Sorry.  That's probably way too much time wasted on something so trivial.

 > > this goto back to the polling loop is a change in behavior.  When we
 > > were tuning NAPI, we found that returning in the missed event case and
 > > letting the NAPI core call the poll routine later actually performed
 > > better, because it allowed more work to pile up.

 > You weren't using your quantum, which is what you're supposed to do.
 > 
 > Sometimes using your quantum correctly won't perform optimally, but in
 > the interest of fairness and what NAPI wants, that is what you're
 > supposed to do, process work until you hit budget or there is no
 > more work.
 > 
 > Look, I'm not going to back down to every single tweak in every
 > driver.  All the drivers should handle this case consistently, and if
 > I have to edit every single driver to make this patch that is exactly
 > what I am going to do and enforce.

OK, although I think it would be better to put changes in driver
behavior into independent patches from the main NAPI change, if only
for the sake of bisectability (otherwise everything is just going to
bisect back to your mega-patch and that kind of sucks for debugging;
cf Linus's reaction to the x86-64 timer conversion patches).

I don't have a lot invested into the details of the NAPI polling here,
but I'll ask the IBM people who saw a big difference in performance
between jumping back directly or waiting for the poll to be
rescheduled to retest and report their results.

Although frankly, I have to say that your position here doesn't make
much sense.  In your earlier patches that got rid of netif_rx_reschedule(),
your suggestion on how to handle the missed event race was to ask the
hardware to trigger another event from the poll routine so it got
rescheduled.  And if the poll routine knows there's more work pending,
I don't see much difference in requesting a synthetic event from the
hardware and then exiting the poll routine versus raising the poll
softirq directly and then exiting the poll routine.

 > If you patch the ipoib driver behavior back afterwards, I will NAK
 > that patch every single time unless you make EVERY SINGLE OTHER DRIVER
 > do the same and thus retain the consistency.

At a meta level, I think it would be better for everyone's blood
pressure if you tried to keep the temperature down during technical
discussions like this.  Look back at what I wrote: "this goto ... is a
change in behavior," and then I explained the current behavior.  I
didn't threaten to NAK this NAPI patch, or even ask you to change the
patch.  I just gave you the information so that you could explain your
reasoning in case the change was intended, or so you could keep the
current behavior if the change was inadvertent.  Being treated with
the same level of collegiality that (I think) I treat you with would
be appreciated.

 - R.

^ permalink raw reply

* Re: [PATCH 6/14] nes: hardware init
From: Jeff Garzik @ 2007-08-08  1:58 UTC (permalink / raw)
  To: ggrundstrom; +Cc: rdreier, ewg, netdev
In-Reply-To: <200708080105.l7815pC3004792@neteffect.com>

ggrundstrom@neteffect.com wrote:
> +struct nes_adapter *nes_init_adapter(struct nes_device *nesdev, u8 hw_rev) {
> +	struct nes_adapter *nesadapter = NULL;
> +	unsigned long num_pds;
> +	u32 u32temp;
> +	u32 port_count;
> +	u16 max_rq_wrs;
> +	u16 max_sq_wrs;
> +	u32 max_mr;
> +	u32 max_256pbl;
> +	u32 max_4kpbl;
> +	u32 max_qp;
> +	u32 max_irrq;
> +	u32 max_cq;
> +	u32 hte_index_mask;
> +	u32 adapter_size;
> +	u32 arp_table_size;
> +	u8  OneG_Mode;
> +
> +	/* search the list of existing adapters */
> +	list_for_each_entry(nesadapter, &nes_adapter_list, list) {
> +		dprintk("Searching Adapter list for PCI devfn = 0x%X,"
> +				" adapter PCI slot/bus = %u/%u, pci devices PCI slot/bus = %u/%u, .\n",
> +				nesdev->pcidev->devfn,
> +				PCI_SLOT(nesadapter->devfn),
> +				nesadapter->bus_number,
> +				PCI_SLOT(nesdev->pcidev->devfn),
> +				nesdev->pcidev->bus->number );
> +		if ((PCI_SLOT(nesadapter->devfn) == PCI_SLOT(nesdev->pcidev->devfn)) &&
> +			(nesadapter->bus_number == nesdev->pcidev->bus->number)) {
> +			nesadapter->ref_count++;
> +			return(nesadapter);

you don't need any of this PCI bus scanning at all.  Please convert to 
normal PCI usage



> +	/* no adapter found */
> +	num_pds = pci_resource_len(nesdev->pcidev, BAR_1) / 4096;

see, this is why the BAR_1 define should go away -- it's actually define 
to the value '2'


> +	if (hw_rev != NE020_REV) {
> +		dprintk("%s: NE020 driver detected unknown hardware revision 0x%x\n",
> +				__FUNCTION__, hw_rev);
> +		return(NULL);
> +	}

move this test to the top of the function


> +	dprintk("%s:%u Determine Soft Reset, QP_control=0x%x, CPU0=0x%x, CPU1=0x%x, CPU2=0x%x\n",
> +			__FUNCTION__, __LINE__,
> +			nes_read_indexed(nesdev, NES_IDX_QP_CONTROL + PCI_FUNC(nesdev->pcidev->devfn) * 8),
> +			nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS),
> +			nes_read_indexed(nesdev, 0x00A4),
> +			nes_read_indexed(nesdev, 0x00A8));
> +
> +		dprintk("%s: Reset and init NE020\n", __FUNCTION__);
> +		if ((port_count = nes_reset_adapter_ne020(nesdev, &OneG_Mode)) == 0) {
> +			return(NULL);
> +		}
> +		if (nes_init_serdes(nesdev, port_count)) {
> +			return(NULL);
> +		}

kill braces


> +		nes_init_csr_ne020(nesdev, hw_rev, port_count);
> +
> +	/* Setup and enable the periodic timer */
> +	nesdev->et_rx_coalesce_usecs_irq = interrupt_mod_interval;
> +	if (nesdev->et_rx_coalesce_usecs_irq) {
> +		nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, 0x80000000 |
> +				((u32)(nesdev->et_rx_coalesce_usecs_irq * 8)));
> +	} else {
> +		nes_write32(nesdev->regs+NES_PERIODIC_CONTROL, 0x00000000);
> +	}
> +
> +	max_qp = nes_read_indexed(nesdev, NES_IDX_QP_CTX_SIZE);
> +	dprintk("%s: QP_CTX_SIZE=%u\n", __FUNCTION__, max_qp);
> +
> +	u32temp = nes_read_indexed(nesdev, NES_IDX_QUAD_HASH_TABLE_SIZE);
> +	if (max_qp > ((u32)1 << (u32temp & 0x001f))) {
> +		dprintk("Reducing Max QPs to %u due to hash table size = 0x%08X\n",
> +				max_qp, u32temp);
> +		max_qp = (u32)1 << (u32temp & 0x001f);
> +	}
> +
> +	hte_index_mask = ((u32)1 << ((u32temp & 0x001f)+1))-1;
> +	dprintk("Max QP = %u, hte_index_mask = 0x%08X.\n", max_qp, hte_index_mask);
> +
> +	u32temp = nes_read_indexed(nesdev, NES_IDX_IRRQ_COUNT);
> +
> +	max_irrq = 1 << (u32temp & 0x001f);
> +
> +	if (max_qp > max_irrq) {
> +		max_qp = max_irrq;
> +		dprintk("Reducing Max QPs to %u due to Available Q1s.\n", max_qp);
> +	}
> +
> +	/* there should be no reason to allocate more pds than qps */
> +	if (num_pds > max_qp)
> +		num_pds = max_qp;
> +
> +	u32temp = nes_read_indexed(nesdev, NES_IDX_MRT_SIZE);
> +	max_mr = (u32)8192 << (u32temp & 0x7);
> +
> +	u32temp = nes_read_indexed(nesdev, NES_IDX_PBL_REGION_SIZE);
> +	max_256pbl = (u32)1 << (u32temp & 0x0000001f);
> +	max_4kpbl = (u32)1 << ((u32temp >> 16) & 0x0000001f);
> +	max_cq = nes_read_indexed(nesdev, NES_IDX_CQ_CTX_SIZE);
> +
> +	u32temp = nes_read_indexed(nesdev, NES_IDX_ARP_CACHE_SIZE);
> +	arp_table_size = 1 << u32temp;
> +
> +	adapter_size = (sizeof(struct nes_adapter) +
> +			(sizeof(unsigned long)-1)) & (~(sizeof(unsigned long)-1));
> +	adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_qp);
> +	adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_mr);
> +	adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(max_cq);
> +	adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(num_pds);
> +	adapter_size += sizeof(unsigned long) * BITS_TO_LONGS(arp_table_size);
> +	adapter_size += sizeof(struct nes_qp **) * max_qp;
> +
> +	/* allocate a new adapter struct */
> +	nesadapter = kmalloc(adapter_size, GFP_KERNEL);
> +	if (nesadapter == NULL) {
> +		return(NULL);
> +	}
> +	memset(nesadapter, 0, adapter_size);

kzalloc


> +	dprintk("Allocating new nesadapter @ %p, size = %u (actual size = %u).\n",
> +			nesadapter, (u32)sizeof(struct nes_adapter), adapter_size);
> +
> +	/* populate the new nesadapter */
> +	nesadapter->devfn = nesdev->pcidev->devfn;
> +	nesadapter->bus_number = nesdev->pcidev->bus->number;

wrong wrong wrong.  Use pci_dev pointer for comparison.  Store that, not 
bus number and devfn.


> +	nesadapter->ref_count = 1;
> +	nesadapter->timer_int_req = 0xffff0000;
> +	nesadapter->OneG_Mode = OneG_Mode;
> +
> +	/* nesadapter->tick_delta = clk_divisor; */
> +	nesadapter->hw_rev = hw_rev;
> +	nesadapter->port_count = port_count;
> +
> +	nesadapter->max_qp = max_qp;
> +	nesadapter->hte_index_mask = hte_index_mask;
> +	nesadapter->max_irrq = max_irrq;
> +	nesadapter->max_mr = max_mr;
> +	nesadapter->max_256pbl = max_256pbl - 1;
> +	nesadapter->max_4kpbl = max_4kpbl - 1;
> +	nesadapter->max_cq = max_cq;
> +	nesadapter->free_256pbl = max_256pbl - 1;
> +	nesadapter->free_4kpbl = max_4kpbl - 1;
> +	nesadapter->max_pd = num_pds;
> +	nesadapter->arp_table_size = arp_table_size;
> +	nesadapter->base_pd = 1;
> +
> +	nesadapter->device_cap_flags =
> +			IB_DEVICE_ZERO_STAG | IB_DEVICE_SEND_W_INV | IB_DEVICE_MEM_WINDOW;
> +
> +	nesadapter->allocated_qps = (unsigned long *)&(((unsigned char *)nesadapter)
> +			[(sizeof(struct nes_adapter)+(sizeof(unsigned long)-1))&(~(sizeof(unsigned long)-1))]);
> +	nesadapter->allocated_cqs = &nesadapter->allocated_qps[BITS_TO_LONGS(max_qp)];
> +	nesadapter->allocated_mrs = &nesadapter->allocated_cqs[BITS_TO_LONGS(max_cq)];
> +	nesadapter->allocated_pds = &nesadapter->allocated_mrs[BITS_TO_LONGS(max_mr)];
> +	nesadapter->allocated_arps = &nesadapter->allocated_pds[BITS_TO_LONGS(num_pds)];
> +	nesadapter->qp_table = (struct nes_qp **)(&nesadapter->allocated_arps[BITS_TO_LONGS(arp_table_size)]);
> +
> +
> +	/* mark the usual suspect QPs and CQs as in use */
> +	for (u32temp = 0; u32temp < NES_FIRST_QPN; u32temp++) {
> +		set_bit(u32temp, nesadapter->allocated_qps);
> +		set_bit(u32temp, nesadapter->allocated_cqs);
> +	}
> +
> +	u32temp = nes_read_indexed(nesdev, NES_IDX_QP_MAX_CFG_SIZES);
> +
> +	max_rq_wrs = ((u32temp >> 8) & 3);
> +	switch (max_rq_wrs) {
> +		case 0:
> +			max_rq_wrs = 4;
> +			break;
> +		case 1:
> +			max_rq_wrs = 16;
> +			break;
> +		case 2:
> +			max_rq_wrs = 32;
> +			break;
> +		case 3:
> +			max_rq_wrs = 512;
> +			break;
> +	}
> +
> +	max_sq_wrs = (u32temp & 3);
> +	switch (max_sq_wrs) {
> +		case 0:
> +			max_sq_wrs = 4;
> +			break;
> +		case 1:
> +			max_sq_wrs = 16;
> +			break;
> +		case 2:
> +			max_sq_wrs = 32;
> +			break;
> +		case 3:
> +			max_sq_wrs = 512;
> +			break;
> +	}
> +	nesadapter->max_qp_wr = min(max_rq_wrs, max_sq_wrs);
> +	dprintk("Max wqes = %u.\n", nesadapter->max_qp_wr);
> +
> +	nesadapter->max_irrq_wr = (u32temp >> 16) & 3;
> +	dprintk("%s: Max IRRQ wqes = %u.\n", __FUNCTION__, nesadapter->max_irrq_wr);
> +
> +
> +	nesadapter->max_sge = 4;
> +	nesadapter->max_cqe = 32767;
> +
> +	if (nes_read_eeprom_values(nesdev, nesadapter)) {
> +		printk(KERN_ERR PFX "Unable to read EEPROM data.\n");
> +		kfree(nesadapter);
> +		return(NULL);
> +	}
> +
> +	u32temp = nes_read_indexed(nesdev, NES_IDX_TCP_TIMER_CONFIG);
> +	nes_write_indexed(nesdev, NES_IDX_TCP_TIMER_CONFIG,
> +			(u32temp & 0xff000000) | (nesadapter->tcp_timer_core_clk_divisor & 0x00ffffff));
> +	dprintk("%s: TCP Timer Config0=%08x\n", __FUNCTION__,
> +			nes_read_indexed(nesdev, NES_IDX_TCP_TIMER_CONFIG));
> +
> +	/* setup port configuration */
> +	if (nesadapter->port_count == 1) {
> +		u32temp = 0x00000000;
> +		if (nes_drv_opt & NES_DRV_OPT_DUAL_LOGICAL_PORT) {
> +			nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000002);
> +		} else {
> +			nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003);
> +		}
> +	} else {
> +		if (nesadapter->port_count == 2) {
> +			u32temp = 0x00000044;
> +		} else {
> +			u32temp = 0x000000e4;
> +		}
> +		nes_write_indexed(nesdev, NES_IDX_TX_POOL_SIZE, 0x00000003);
> +	}
> +
> +	nes_write_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT, u32temp);
> +	dprintk("%s: Probe time, LOG2PHY=%u\n", __FUNCTION__,
> +			nes_read_indexed(nesdev, NES_IDX_NIC_LOGPORT_TO_PHYPORT));
> +
> +	spin_lock_init(&nesadapter->resource_lock);
> +	spin_lock_init(&nesadapter->phy_lock);
> +
> +	init_timer(&nesadapter->mh_timer);
> +	nesadapter->mh_timer.function = nes_mh_fix;
> +	nesadapter->mh_timer.expires = jiffies + (HZ/5);  /* 1 second */
> +	nesadapter->mh_timer.data = (unsigned long)nesdev;
> +	add_timer(&nesadapter->mh_timer);
> +
> +	INIT_LIST_HEAD(&nesadapter->nesvnic_list[0]);
> +	INIT_LIST_HEAD(&nesadapter->nesvnic_list[1]);
> +	INIT_LIST_HEAD(&nesadapter->nesvnic_list[2]);
> +	INIT_LIST_HEAD(&nesadapter->nesvnic_list[3]);
> +
> +	list_add_tail(&nesadapter->list, &nes_adapter_list);
> +
> +	return(nesadapter);
> +}
> +
> +
> +/**
> + * nes_reset_adapter_ne020
> + */
> +unsigned int nes_reset_adapter_ne020(struct nes_device *nesdev, u8 *OneG_Mode)
> +{
> +	u32 port_count;
> +	u32 u32temp;
> +	u32 i;
> +
> +	u32temp = nes_read32(nesdev->regs+NES_SOFTWARE_RESET);
> +	port_count = ((u32temp & 0x00000300) >> 8) + 1;
> +	/* TODO: assuming that both SERDES are set the same for now */
> +	*OneG_Mode = (u32temp & 0x00003c00) ? 0 : 1;
> +	dprintk("%s: Initial Software Reset = 0x%08X, port_count=%u\n", __FUNCTION__, u32temp, port_count);
> +	if (*OneG_Mode) {
> +		dprintk("%s: Running in 1G mode.\n", __FUNCTION__);
> +	}
> +	u32temp &= 0xff00ffc0;
> +	switch (port_count) {
> +		case 1:
> +			u32temp |= 0x00ee0000;
> +			break;
> +		case 2:
> +			u32temp |= 0x00cc0000;
> +			break;
> +		case 4:
> +			u32temp |= 0x00000000;
> +			break;
> +		default:
> +			return (0);
> +			break;
> +	}
> +
> +	/* check and do full reset if needed */
> +	if (nes_read_indexed(nesdev, NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8))) {
> +		dprintk("Issuing Full Soft reset = 0x%08X\n", u32temp | 0xd);
> +		nes_write32(nesdev->regs+NES_SOFTWARE_RESET, u32temp | 0xd);
> +
> +		i = 0;
> +		while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000) {
> +			mdelay(1);
> +		}
> +		if (i >= 10000) {
> +			dprintk("Did not see full soft reset done.\n");
> +			return (0);
> +		}
> +	}
> +
> +	/* port reset */
> +	switch (port_count) {
> +		case 1:
> +			u32temp |= 0x00ee0010;
> +			break;
> +		case 2:
> +			u32temp |= 0x00cc0030;
> +			break;
> +		case 4:
> +			u32temp |= 0x00000030;
> +			break;
> +	}
> +
> +	dprintk("Issuing Port Soft reset = 0x%08X\n", u32temp | 0xd);
> +	nes_write32(nesdev->regs+NES_SOFTWARE_RESET, u32temp | 0xd);
> +
> +	i = 0;
> +	while (((nes_read32(nesdev->regs+NES_SOFTWARE_RESET) & 0x00000040) == 0) && i++ < 10000) {
> +		mdelay(1);
> +	}
> +	if (i >= 10000) {
> +		dprintk("Did not see port soft reset done.\n");
> +		return (0);
> +	}
> +
> +	/* serdes 0 */
> +	i = 0;
> +	while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0)
> +			& 0x0000000f)) != 0x0000000f) && i++ < 5000) {
> +		mdelay(1);

all these delays should be msleep()


> +	if (i >= 5000) {
> +		dprintk("Serdes 0 not ready, status=%x\n", u32temp);
> +		return (0);
> +	}
> +
> +	/* serdes 1 */
> +	if (port_count > 1) {
> +		i = 0;
> +		while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1)
> +				& 0x0000000f)) != 0x0000000f) && i++ < 5000) {
> +			mdelay(1);
> +		}
> +		if (i >= 5000) {
> +			dprintk("Serdes 1 not ready, status=%x\n", u32temp);
> +			return (0);
> +		}
> +	}
> +
> +	i = 0;
> +	while ((nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS) != 0x80) && i++ < 10000) {
> +		mdelay(1);
> +	}
> +	dprintk("%s:%u CPU_STATUS loops=%u\n", __FUNCTION__, __LINE__, i);
> +	if (i >= 10000) {
> +		printk(KERN_ERR PFX "Internal CPU not ready, status = %02X\n",
> +				nes_read_indexed(nesdev, NES_IDX_INT_CPU_STATUS));
> +		return (0);
> +	}
> +
> +	return (port_count);
> +}
> +
> +
> +/**
> + * nes_init_serdes
> + */
> +int nes_init_serdes(struct nes_device *nesdev, u8 port_count)
> +{
> +	int i;
> +	u32 u32temp;
> +
> +	/* init serdes 0 */
> +	nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL0, 0x00000008);
> +	i = 0;
> +	while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS0)
> +			& 0x0000000f)) != 0x0000000f) && i++ < 5000) {
> +		mdelay(1);
> +	}
> +	if (i >= 5000) {
> +		dprintk("Init: serdes 0 not ready, status=%x\n", u32temp);
> +		return (1);
> +	}
> +	nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP0, 0x000bdef7);
> +	nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE0, 0x9ce73000);
> +	nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_MODE0, 0x0ff00000);
> +	nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_SIGDET0, 0x00000000);
> +	nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_BYPASS0, 0x00000000);
> +	nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_LOOPBACK_CONTROL0, 0x00000000);
> +	nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL0, 0xf0002222);
> +	nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL0, 0x000000ff);
> +
> +	if (port_count > 1) {
> +		/* init serdes 1 */
> +		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_CONTROL1, 0x00000048);
> +		i = 0;
> +		while (((u32temp = (nes_read_indexed(nesdev, NES_IDX_ETH_SERDES_COMMON_STATUS1) & 0x0000000f)) != 0x0000000f) &&
> +			   (i++ < 5000)) {
> +			mdelay(1);
> +		}
> +		if (i >= 5000) {
> +			printk("%s: Init: serdes 1 not ready, status=%x\n", __FUNCTION__, u32temp);
> +			/* return 1; */
> +		}
> +		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_EMP1, 0x000bdef7);
> +		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_TX_DRIVE1, 0x9ce73000);
> +		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_MODE1, 0x0ff00000);
> +		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_SIGDET1, 0x00000000);
> +		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_BYPASS1, 0x00000000);
> +		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_LOOPBACK_CONTROL1, 0x00000000);
> +		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_RX_EQ_CONTROL1, 0xf0002222);
> +		nes_write_indexed(nesdev, NES_IDX_ETH_SERDES_CDR_CONTROL1, 0x000000ff);
> +	}
> +	return (0);
> +}
> +
> +
> +/**
> + * nes_init_csr_ne020
> + * Initialize registers for ne020 hardware
> + */
> +void nes_init_csr_ne020(struct nes_device *nesdev, u8 hw_rev, u8 port_count)
> +{
> +
> +	nes_write_indexed(nesdev, 0x000001E4, 0x00000007);	
> +	/* nes_write_indexed(nesdev, 0x000001E8, 0x000208C4); */	
> +	nes_write_indexed(nesdev, 0x000001E8, 0x00020844);	
> +	nes_write_indexed(nesdev, 0x000001D8, 0x00048002);	
> +	/* nes_write_indexed(nesdev, 0x000001D8, 0x0004B002); */  
> +	nes_write_indexed(nesdev, 0x000001FC, 0x00050005);	
> +	nes_write_indexed(nesdev, 0x00000600, 0x55555555);	
> +	nes_write_indexed(nesdev, 0x00000604, 0x55555555);	
> +
> +	/* TODO: move these MAC register settings to NIC bringup */
> +	nes_write_indexed(nesdev, 0x00002000, 0x00000001);	
> +	nes_write_indexed(nesdev, 0x00002004, 0x00000001);	
> +	nes_write_indexed(nesdev, 0x00002008, 0x0000FFFF);	
> +	nes_write_indexed(nesdev, 0x0000200C, 0x00000001);	
> +	nes_write_indexed(nesdev, 0x00002010, 0x000003c1);	
> +	nes_write_indexed(nesdev, 0x0000201C, 0x75345678);	
> +	if (port_count > 1) {
> +		nes_write_indexed(nesdev, 0x00002200, 0x00000001);	
> +		nes_write_indexed(nesdev, 0x00002204, 0x00000001);	
> +		nes_write_indexed(nesdev, 0x00002208, 0x0000FFFF);	
> +		nes_write_indexed(nesdev, 0x0000220C, 0x00000001);	
> +		nes_write_indexed(nesdev, 0x00002210, 0x000003c1);	
> +		nes_write_indexed(nesdev, 0x0000221C, 0x75345678);	
> +	}
> +	if (port_count > 2) {
> +		nes_write_indexed(nesdev, 0x00002400, 0x00000001);	
> +		nes_write_indexed(nesdev, 0x00002404, 0x00000001);	
> +		nes_write_indexed(nesdev, 0x00002408, 0x0000FFFF);	
> +		nes_write_indexed(nesdev, 0x0000240C, 0x00000001);	
> +		nes_write_indexed(nesdev, 0x00002410, 0x000003c1);	
> +		nes_write_indexed(nesdev, 0x0000241C, 0x75345678);	
> +
> +		nes_write_indexed(nesdev, 0x00002600, 0x00000001);	
> +		nes_write_indexed(nesdev, 0x00002604, 0x00000001);	
> +		nes_write_indexed(nesdev, 0x00002608, 0x0000FFFF);	
> +		nes_write_indexed(nesdev, 0x0000260C, 0x00000001);	
> +		nes_write_indexed(nesdev, 0x00002610, 0x000003c1);	
> +		nes_write_indexed(nesdev, 0x0000261C, 0x75345678);	
> +	}
> +
> +	nes_write_indexed(nesdev, 0x00005000, 0x00018000);	
> +	/* nes_write_indexed(nesdev, 0x00005000, 0x00010000); */  
> +	nes_write_indexed(nesdev, 0x00005004, 0x00020001);	
> +	nes_write_indexed(nesdev, 0x00005008, 0x1F1F1F1F);	
> +	nes_write_indexed(nesdev, 0x00005010, 0x1F1F1F1F);	
> +	nes_write_indexed(nesdev, 0x00005018, 0x1F1F1F1F);	
> +	nes_write_indexed(nesdev, 0x00005020, 0x1F1F1F1F);	
> +	nes_write_indexed(nesdev, 0x00006090, 0xFFFFFFFF);	
> +
> +	/* TODO: move this to code, get from EEPROM */
> +	nes_write_indexed(nesdev, 0x00000900, 0x20000001);	
> +	nes_write_indexed(nesdev, 0x000060C0, 0x0000028e);	
> +	nes_write_indexed(nesdev, 0x000060C8, 0x00000020);	
> +
> +	nes_write_indexed(nesdev, 0x000001EC, 0x5b2625a0);	
> +	/* nes_write_indexed(nesdev, 0x000001EC, 0x5f2625a0); */  
> +	
> +}
> +
> +
> +/**
> + * nes_destroy_adapter - destroy the adapter structure
> + */
> +void nes_destroy_adapter(struct nes_adapter *nesadapter)
> +{
> +	struct nes_adapter *tmp_adapter;
> +
> +	list_for_each_entry(tmp_adapter, &nes_adapter_list, list) {
> +		dprintk("%s: Nes Adapter list entry = 0x%p.\n", __FUNCTION__, tmp_adapter);
> +	}
> +
> +	nesadapter->ref_count--;
> +	if (!nesadapter->ref_count) {
> +			del_timer(&nesadapter->mh_timer);
> +
> +		dprintk("nes_destroy_adapter: Deleting adapter from adapter list.\n");
> +		list_del(&nesadapter->list);
> +		dprintk("nes_destroy_adapter: Freeing adapter structure.\n");
> +		kfree(nesadapter);
> +	}
> +	dprintk("%s: Done.\n", __FUNCTION__);
> +}
> +
> +
> +/**
> + * nes_init_cqp
> + */
> +int nes_init_cqp(struct nes_device *nesdev)
> +{
> +	struct nes_adapter *nesadapter = nesdev->nesadapter;
> +	struct nes_hw_cqp_qp_context *cqp_qp_context;
> +	struct nes_hw_cqp_wqe *cqp_wqe;
> +	struct nes_hw_ceq *ceq;
> +	struct nes_hw_ceq *nic_ceq;
> +	struct nes_hw_aeq *aeq;
> +	void *vmem;
> +	dma_addr_t pmem;
> +	u32 count=0;
> +	u32 cqp_head;
> +	u64 u64temp;
> +	u32 u32temp;
> +
> +#define NES_NIC_CEQ_SIZE 8
> +/* NICs will be on a separate CQ */
> +#define NES_CCEQ_SIZE ((nesadapter->max_cq / nesadapter->port_count) - 32)
> +
> +	/* allocate CQP memory */
> +	/* Need to add max_cq to the aeq size once cq overflow checking is added back */
> +	/* SQ is 512 byte aligned, others are 256 byte aligned */
> +	nesdev->cqp_mem_size = 512 +
> +			(sizeof(struct nes_hw_cqp_wqe) * NES_CQP_SQ_SIZE) +
> +			(sizeof(struct nes_hw_cqe) * NES_CCQ_SIZE) +
> +			max(((u32)sizeof(struct nes_hw_ceqe) * NES_CCEQ_SIZE), (u32)256) +
> +			max(((u32)sizeof(struct nes_hw_ceqe) * NES_NIC_CEQ_SIZE), (u32)256) +
> +			(sizeof(struct nes_hw_aeqe) * nesadapter->max_qp) +
> +			sizeof(struct nes_hw_cqp_qp_context);
> +
> +	nesdev->cqp_vbase = pci_alloc_consistent(nesdev->pcidev, nesdev->cqp_mem_size,
> +			&nesdev->cqp_pbase);
> +	if (!nesdev->cqp_vbase) {
> +		dprintk(KERN_ERR PFX "Unable to allocate memory for host descriptor rings\n");
> +		return(-ENOMEM);
> +	}
> +	memset(nesdev->cqp_vbase, 0, nesdev->cqp_mem_size);
> +
> +	/* Allocate a twice the number of CQP requests as the SQ size */
> +	nesdev->nes_cqp_requests = kmalloc(sizeof(struct nes_cqp_request) *
> +			2 * NES_CQP_SQ_SIZE, GFP_KERNEL);

kzalloc


> +	if (NULL == nesdev->nes_cqp_requests) {

kernel standard:  variable comes first, then comparison constant

or just 'if (!foo)'


> +		dprintk(KERN_ERR PFX "Unable to allocate memory CQP request entries.\n");
> +		pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, nesdev->cqp.sq_vbase,
> +				nesdev->cqp.sq_pbase);
> +		return(-ENOMEM);
> +	}
> +	memset(nesdev->nes_cqp_requests, 0, sizeof(struct nes_cqp_request) *
> +			2 * NES_CQP_SQ_SIZE);
> +	dprintk("Allocated CQP structures at %p (phys = %016lX), size = %u.\n",
> +			nesdev->cqp_vbase, (unsigned long)nesdev->cqp_pbase, nesdev->cqp_mem_size);
> +
> +	spin_lock_init(&nesdev->cqp.lock);
> +	init_waitqueue_head(&nesdev->cqp.waitq);
> +
> +	/* Setup Various Structures */
> +	vmem = (void *)(((unsigned long long)nesdev->cqp_vbase + (512 - 1)) &
> +			~(unsigned long long)(512 - 1));
> +	pmem = (dma_addr_t)(((unsigned long long)nesdev->cqp_pbase + (512 - 1)) &
> +			~(unsigned long long)(512 - 1));
> +
> +	nesdev->cqp.sq_vbase = vmem;
> +	nesdev->cqp.sq_pbase = pmem;
> +	nesdev->cqp.sq_size = NES_CQP_SQ_SIZE;
> +	nesdev->cqp.sq_head = 0;
> +	nesdev->cqp.sq_tail = 0;
> +	nesdev->cqp.qp_id = PCI_FUNC(nesdev->pcidev->devfn);

kill


> +	dprintk("CQP at %p (phys = %016lX).\n",
> +			nesdev->cqp.sq_vbase,	(unsigned long)nesdev->cqp.sq_pbase);
> +
> +	vmem += (sizeof(struct nes_hw_cqp_wqe) * nesdev->cqp.sq_size);
> +	pmem += (sizeof(struct nes_hw_cqp_wqe) * nesdev->cqp.sq_size);
> +
> +	nesdev->ccq.cq_vbase = vmem;
> +	nesdev->ccq.cq_pbase = pmem;
> +	nesdev->ccq.cq_size = NES_CCQ_SIZE;
> +	nesdev->ccq.cq_head = 0;
> +	nesdev->ccq.ce_handler = nes_cqp_ce_handler;
> +	nesdev->ccq.cq_number = PCI_FUNC(nesdev->pcidev->devfn);

kill


> +	dprintk("CCQ at %p (phys = %016lX).\n",
> +			nesdev->ccq.cq_vbase, (unsigned long)nesdev->ccq.cq_pbase);
> +
> +	vmem += (sizeof(struct nes_hw_cqe) * nesdev->ccq.cq_size);
> +	pmem += (sizeof(struct nes_hw_cqe) * nesdev->ccq.cq_size);
> +
> +	nesdev->ceq_index = PCI_FUNC(nesdev->pcidev->devfn);
> +	ceq = &nesadapter->ceq[nesdev->ceq_index];
> +	ceq->ceq_vbase = vmem;
> +	ceq->ceq_pbase = pmem;
> +	ceq->ceq_size = NES_CCEQ_SIZE;
> +	ceq->ceq_head = 0;
> +	dprintk("CEQ at %p (phys = %016lX).\n",
> +			ceq->ceq_vbase, (unsigned long)ceq->ceq_pbase);
> +
> +	vmem += max(((u32)sizeof(struct nes_hw_ceqe) * ceq->ceq_size), (u32)256);
> +	pmem += max(((u32)sizeof(struct nes_hw_ceqe) * ceq->ceq_size), (u32)256);
> +
> +	nesdev->nic_ceq_index = PCI_FUNC(nesdev->pcidev->devfn) + 8;
> +	nic_ceq = &nesadapter->ceq[nesdev->nic_ceq_index];
> +	nic_ceq->ceq_vbase = vmem;
> +	nic_ceq->ceq_pbase = pmem;
> +	nic_ceq->ceq_size = NES_NIC_CEQ_SIZE;
> +	nic_ceq->ceq_head = 0;
> +	dprintk("NIC CEQ at %p (phys = %016lX).\n",
> +			nic_ceq->ceq_vbase, (unsigned long)nic_ceq->ceq_pbase);
> +
> +	vmem += max(((u32)sizeof(struct nes_hw_ceqe) * nic_ceq->ceq_size), (u32)256);
> +	pmem += max(((u32)sizeof(struct nes_hw_ceqe) * nic_ceq->ceq_size), (u32)256);
> +
> +	aeq = &nesadapter->aeq[PCI_FUNC(nesdev->pcidev->devfn)];
> +	aeq->aeq_vbase = vmem;
> +	aeq->aeq_pbase = pmem;
> +	aeq->aeq_size = nesadapter->max_qp;
> +	aeq->aeq_head = 0;
> +	dprintk("AEQ  at %p (phys = %016lX).\n",
> +			aeq->aeq_vbase, (unsigned long)aeq->aeq_pbase);
> +
> +	/* Setup QP Context */
> +	vmem += (sizeof(struct nes_hw_aeqe) * aeq->aeq_size);
> +	pmem += (sizeof(struct nes_hw_aeqe) * aeq->aeq_size);
> +
> +	cqp_qp_context = vmem;
> +	cqp_qp_context->context_words[0] = (PCI_FUNC(nesdev->pcidev->devfn) << 12) + (2 << 10);
> +	cqp_qp_context->context_words[1] = 0;
> +	cqp_qp_context->context_words[2] = (u32)nesdev->cqp.sq_pbase;
> +	cqp_qp_context->context_words[3] = ((u64)nesdev->cqp.sq_pbase) >> 32;
> +
> +	dprintk("Address of CQP Context = %p.\n", cqp_qp_context);
> +	for (count=0;count<4 ; count++) {
> +		dprintk("CQP Context, Line %u = %08X.\n",
> +				count, cqp_qp_context->context_words[count]);
> +	}
> +
> +	/* Write the address to Create CQP */
> +	if ((sizeof(dma_addr_t) > 4)) {
> +		nes_write_indexed(nesdev,
> +				NES_IDX_CREATE_CQP_HIGH + (PCI_FUNC(nesdev->pcidev->devfn) * 8),
> +				((u64)pmem) >> 32);
> +	} else {
> +		nes_write_indexed(nesdev,
> +				NES_IDX_CREATE_CQP_HIGH + (PCI_FUNC(nesdev->pcidev->devfn) * 8), 0);
> +	}
> +	nes_write_indexed(nesdev,
> +			NES_IDX_CREATE_CQP_LOW + (PCI_FUNC(nesdev->pcidev->devfn) * 8),
> +			(u32)pmem);
> +
> +	dprintk("Address of CQP SQ = %p.\n", nesdev->cqp.sq_vbase);
> +
> +	INIT_LIST_HEAD(&nesdev->cqp_avail_reqs);
> +	INIT_LIST_HEAD(&nesdev->cqp_pending_reqs);
> +
> +	for (count=0; count<2*NES_CQP_SQ_SIZE; count++) {
> +		init_waitqueue_head(&nesdev->nes_cqp_requests[count].waitq);
> +		list_add_tail(&nesdev->nes_cqp_requests[count].list, &nesdev->cqp_avail_reqs);
> +		/* dprintk("Adding cqp request %p to the available list \n",
> +				&nesdev->nes_cqp_requests[count]); */
> +	}
> +
> +	/* Write Create CCQ WQE */
> +	cqp_head = nesdev->cqp.sq_head++;
> +	cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
> +	cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_CQ | NES_CQP_CQ_CEQ_VALID |
> +			NES_CQP_CQ_CHK_OVERFLOW | ((u32)nesdev->ccq.cq_size << 16));
> +	cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesdev->ccq.cq_number |
> +			((u32)nesdev->ceq_index<<16));
> +	cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = 0;
> +	*((struct nes_hw_cqp **)&cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX]) = &nesdev->cqp;
> +	cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0;
> +	cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0;
> +	u64temp = (u64)nesdev->ccq.cq_pbase;
> +	cqp_wqe->wqe_words[NES_CQP_CQ_WQE_PBL_LOW_IDX] = cpu_to_le32((u32)u64temp);
> +	cqp_wqe->wqe_words[NES_CQP_CQ_WQE_PBL_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
> +	cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX] = 0;
> +	/* TODO: the following 2 lines likely have endian issues */
> +	*((struct nes_hw_cq **)&cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX]) = &nesdev->ccq;
> +	*((u64 *)&cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX]) >>= 1;
> +	dprintk("%s: CQ%u context = 0x%08X:0x%08X.\n", __FUNCTION__, nesdev->ccq.cq_number,
> +			cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_HIGH_IDX],
> +			cqp_wqe->wqe_words[NES_CQP_CQ_WQE_CQ_CONTEXT_LOW_IDX]);
> +
> +	cqp_wqe->wqe_words[NES_CQP_CQ_WQE_DOORBELL_INDEX_HIGH_IDX] = 0;
> +
> +	/* Write Create CEQ WQE */
> +	cqp_head = nesdev->cqp.sq_head++;
> +	cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
> +	cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_CEQ +
> +			((u32)nesdev->ceq_index << 8));
> +	cqp_wqe->wqe_words[NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX] = cpu_to_le32(ceq->ceq_size);
> +	cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = 0;
> +	*((struct nes_hw_cqp **)&cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX]) = &nesdev->cqp;
> +	*((struct nes_cqp_request **)&cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX]) = NULL;
> +	u64temp = (u64)ceq->ceq_pbase;
> +	cqp_wqe->wqe_words[NES_CQP_CEQ_WQE_PBL_LOW_IDX] = cpu_to_le32((u32)u64temp);
> +	cqp_wqe->wqe_words[NES_CQP_CEQ_WQE_PBL_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
> +
> +	/* Write Create AEQ WQE */
> +	cqp_head = nesdev->cqp.sq_head++;
> +	cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
> +	cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_AEQ +
> +			((u32)PCI_FUNC(nesdev->pcidev->devfn) << 8));
> +	cqp_wqe->wqe_words[NES_CQP_AEQ_WQE_ELEMENT_COUNT_IDX] = cpu_to_le32(aeq->aeq_size);
> +	cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = 0;
> +	*((struct nes_hw_cqp **)&cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX]) = &nesdev->cqp;
> +	cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0;
> +	cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0;
> +	u64temp = (u64)aeq->aeq_pbase;
> +	cqp_wqe->wqe_words[NES_CQP_AEQ_WQE_PBL_LOW_IDX] = cpu_to_le32((u32)u64temp);
> +	cqp_wqe->wqe_words[NES_CQP_AEQ_WQE_PBL_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));
> +
> +	/* Write Create CEQ WQE */
> +	cqp_head = nesdev->cqp.sq_head++;
> +	cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
> +	cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_CREATE_CEQ +
> +			((u32)nesdev->nic_ceq_index << 8));
> +	cqp_wqe->wqe_words[NES_CQP_CEQ_WQE_ELEMENT_COUNT_IDX] = cpu_to_le32(nic_ceq->ceq_size);
> +	cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = 0;
> +	*((struct nes_hw_cqp **)&cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX]) = &nesdev->cqp;
> +	cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0;
> +	cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0;
> +	u64temp = (u64)nic_ceq->ceq_pbase;
> +	cqp_wqe->wqe_words[NES_CQP_CEQ_WQE_PBL_LOW_IDX] = cpu_to_le32((u32)u64temp);
> +	cqp_wqe->wqe_words[NES_CQP_CEQ_WQE_PBL_HIGH_IDX] = cpu_to_le32((u32)(u64temp >> 32));

remove all these pointless u32 casts


> +	/* Poll until CCQP done */
> +	count = 0;
> +	do {
> +		if (count++ > 1000) {
> +			printk(KERN_ERR PFX "Error creating CQP\n");
> +			pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size,
> +					nesdev->cqp_vbase, nesdev->cqp_pbase);

consolidate duplicated error handling code


> +			return(-1);
> +		}
> +		udelay(10);
> +	} while (!(nes_read_indexed(nesdev,
> +			NES_IDX_QP_CONTROL + (PCI_FUNC(nesdev->pcidev->devfn) * 8)) & (1 << 8)));
> +
> +	dprintk("CQP Status = 0x%08X\n", nes_read_indexed(nesdev,
> +			NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8)));
> +
> +	u32temp = 0x04800000;
> +	nes_write32(nesdev->regs+NES_WQE_ALLOC, u32temp | nesdev->cqp.qp_id);
> +
> +	/* wait for the CCQ, CEQ, and AEQ to get created */
> +	count = 0;
> +	do {
> +		if (count++ > 1000) {
> +			printk(KERN_ERR PFX "Error creating CCQ, CEQ, and AEQ\n");
> +			pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size,
> +					nesdev->cqp_vbase, nesdev->cqp_pbase);
> +			return(-1);
> +		}
> +		udelay(10);
> +	} while (((nes_read_indexed(nesdev,
> +			NES_IDX_QP_CONTROL + (PCI_FUNC(nesdev->pcidev->devfn)*8)) & (15<<8)) != (15<<8)));
> +
> +	/* dump the QP status value */
> +	dprintk("QP Status = 0x%08X\n", nes_read_indexed(nesdev,
> +			NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8)));
> +
> +	nesdev->cqp.sq_tail++;
> +
> +	return (0);
> +}
> +
> +
> +/**
> + * nes_destroy_cqp
> + */
> +int nes_destroy_cqp(struct nes_device *nesdev)
> +{
> +	struct nes_hw_cqp_wqe *cqp_wqe;
> +	u32 count=0;
> +	u32 cqp_head;
> +	unsigned long flags;
> +
> +	dprintk("Waiting for CQP work to complete.\n");
> +	do {
> +		if (count++ > 1000)	break;
> +		udelay(10);
> +	} while (!(nesdev->cqp.sq_head == nesdev->cqp.sq_tail));
> +
> +	/* Reset CCQ */
> +	nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_RESET |
> +			nesdev->ccq.cq_number);
> +
> +	/* Disable device interrupts */
> +	nes_write32(nesdev->regs+NES_INT_MASK, 0x7fffffff);
> +	/* Destroy the AEQ */
> +	spin_lock_irqsave(&nesdev->cqp.lock, flags);
> +	cqp_head = nesdev->cqp.sq_head++;
> +	nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
> +	cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
> +	cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_AEQ |
> +			((u32)PCI_FUNC(nesdev->pcidev->devfn)<<8));
> +	cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = 0;
> +	/* Destroy the NIC CEQ */
> +	cqp_head = nesdev->cqp.sq_head++;
> +	nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
> +	cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
> +	cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CEQ |
> +			((u32)nesdev->nic_ceq_index<<8));
> +	/* Destroy the CEQ */
> +	cqp_head = nesdev->cqp.sq_head++;
> +	nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
> +	cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
> +	cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_CEQ |
> +			(nesdev->ceq_index<<8));
> +	/* Destroy the CCQ */
> +	cqp_head = nesdev->cqp.sq_head++;
> +	nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
> +	cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
> +	cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] =  cpu_to_le32(NES_CQP_DESTROY_CQ);
> +	cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32( nesdev->ccq.cq_number ||
> +			((u32)nesdev->ceq_index<<16));
> +	/* Destroy CQP */
> +	cqp_head = nesdev->cqp.sq_head++;
> +	nesdev->cqp.sq_head &= nesdev->cqp.sq_size-1;
> +	cqp_wqe = &nesdev->cqp.sq_vbase[cqp_head];
> +	cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] = cpu_to_le32(NES_CQP_DESTROY_QP |
> +			NES_CQP_QP_TYPE_CQP);
> +	cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] =  cpu_to_le32(nesdev->cqp.qp_id);
> +
> +	barrier();

insufficient


> +	/* Ring doorbell (4 WQEs) */
> +	nes_write32(nesdev->regs+NES_WQE_ALLOC, 0x05800000 | nesdev->cqp.qp_id);
> +
> +	/* Wait for the destroy to complete */
> +	spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
> +
> +	/* wait for the CCQ, CEQ, and AEQ to get destroyed */
> +	count = 0;
> +	do {
> +		if (count++ > 1000) {
> +			printk(KERN_ERR PFX "Function%d: Error destroying CCQ, CEQ, and AEQ\n",
> +					PCI_FUNC(nesdev->pcidev->devfn));
> +			break;
> +		}
> +		udelay(10);
> +	} while (((nes_read_indexed(nesdev,
> +			NES_IDX_QP_CONTROL + (PCI_FUNC(nesdev->pcidev->devfn)*8)) & (15<<8)) != 0));
> +
> +	/* dump the QP status value */
> +	dprintk("Function%d: QP Status = 0x%08X\n",
> +			PCI_FUNC(nesdev->pcidev->devfn),
> +			nes_read_indexed(nesdev,
> +			NES_IDX_QP_CONTROL+(PCI_FUNC(nesdev->pcidev->devfn)*8)));
> +
> +	kfree(nesdev->nes_cqp_requests);
> +
> +	/* Free the control structures */
> +	pci_free_consistent(nesdev->pcidev, nesdev->cqp_mem_size, nesdev->cqp.sq_vbase,
> +			nesdev->cqp.sq_pbase);
> +
> +	return (0);
> +}
> +
> +
> +/**
> + * nes_init_phy
> + */
> +int nes_init_phy(struct nes_device *nesdev)
> +{
> +	struct nes_adapter *nesadapter = nesdev->nesadapter;
> +	u32 counter = 0;
> +	u32 mac_index = nesdev->mac_index;
> +	u16 phy_data;
> +
> +	if (nesadapter->OneG_Mode) {
> +		dprintk("1G PHY, mac_index = %d.\n", mac_index);
> +		nes_read_1G_phy_reg(nesdev, 1, nesadapter->phy_index[mac_index], &phy_data);
> +		dprintk("Phy data from register 1 phy address %u = 0x%X.\n",
> +				nesadapter->phy_index[mac_index], phy_data);
> +
> +		nes_write_1G_phy_reg(nesdev, 23, nesadapter->phy_index[mac_index],  0xb000);
> +
> +		/* Reset the PHY */
> +		nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], 0x8000);
> +		udelay(100);
> +		counter = 0;
> +		do {
> +			nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data);
> +			dprintk("Phy data from register 0 = 0x%X.\n", phy_data);
> +			if (counter++ > 100) break;
> +		} while (phy_data & 0x8000);
> +
> +		/* Setting no phy loopback */
> +		phy_data &= 0xbfff;
> +		phy_data |= 0x1140;
> +		nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index],  phy_data);
> +		nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data);
> +		dprintk("Phy data from register 0 = 0x%X.\n", phy_data);
> +
> +		nes_read_1G_phy_reg(nesdev, 0x17, nesadapter->phy_index[mac_index], &phy_data);
> +		dprintk("Phy data from register 0x17 = 0x%X.\n", phy_data);
> +
> +		nes_read_1G_phy_reg(nesdev, 0x1e, nesadapter->phy_index[mac_index], &phy_data);
> +		dprintk("Phy data from register 0x1e = 0x%X.\n", phy_data);
> +
> +		/* Setting the interrupt mask */
> +		nes_read_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], &phy_data);
> +		dprintk("Phy data from register 0x19 = 0x%X.\n", phy_data);
> +		nes_write_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], 0xffee);
> +
> +		nes_read_1G_phy_reg(nesdev, 0x19, nesadapter->phy_index[mac_index], &phy_data);
> +		dprintk("Phy data from register 0x19 = 0x%X.\n", phy_data);
> +
> +		/* turning on flow control */
> +		nes_read_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index], &phy_data);
> +		dprintk("Phy data from register 0x4 = 0x%X.\n", phy_data);
> +		nes_write_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index],
> +				(phy_data & ~(0x03E0)) | 0xc00);
> +		/* nes_write_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index],
> +				phy_data | 0xc00); */
> +		nes_read_1G_phy_reg(nesdev, 4, nesadapter->phy_index[mac_index], &phy_data);
> +		dprintk("Phy data from register 0x4 = 0x%X.\n", phy_data);
> +
> +		nes_read_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index], &phy_data);
> +		dprintk("Phy data from register 0x9 = 0x%X.\n", phy_data);
> +		/* Clear Half duplex */
> +		nes_write_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index],
> +				phy_data & ~(0x0100));
> +		nes_read_1G_phy_reg(nesdev, 9, nesadapter->phy_index[mac_index], &phy_data);
> +		dprintk("Phy data from register 0x9 = 0x%X.\n", phy_data);
> +
> +		nes_read_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], &phy_data);
> +		nes_write_1G_phy_reg(nesdev, 0, nesadapter->phy_index[mac_index], phy_data | 0x0300);

move all code down one indentation level.  adjust test at top of 
function to return accordingly, or use a goto

I stopped reviewing here.  please do all style changes, so we can review 
this driver in depth.


^ permalink raw reply

* Re: [PATCH 5/14] nes: context structures and defines
From: Jeff Garzik @ 2007-08-08  1:50 UTC (permalink / raw)
  To: ggrundstrom; +Cc: rdreier, ewg, netdev
In-Reply-To: <200708080103.l7813b7E004778@neteffect.com>

ggrundstrom@neteffect.com wrote:
> QP context structures and defines
> 
> Signed-off-by: Glenn Grundstrom <ggrundstrom@neteffect.com>
> ---
> diff -Nurp NULL ofa_kernel-1.2/drivers/infiniband/hw/nes/nes_context.h
> --- NULL	1969-12-31 18:00:00.000000000 -0600
> +++ ofa_kernel-1.2/drivers/infiniband/hw/nes/nes_context.h	2007-08-06 20:09:04.000000000 -0500
> @@ -0,0 +1,193 @@
> +/*
> + * Copyright (c) 2006 NetEffect, Inc. All rights reserved.
> + *
> + * This software is available to you under a choice of one of two
> + * licenses.  You may choose to be licensed under the terms of the GNU
> + * General Public License (GPL) Version 2, available from the file
> + * COPYING in the main directory of this source tree, or the
> + * OpenIB.org BSD license below:
> + *
> + *     Redistribution and use in source and binary forms, with or
> + *     without modification, are permitted provided that the following
> + *     conditions are met:
> + *
> + *      - Redistributions of source code must retain the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer.
> + *
> + *      - Redistributions in binary form must reproduce the above
> + *        copyright notice, this list of conditions and the following
> + *        disclaimer in the documentation and/or other materials
> + *        provided with the distribution.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + */
> +
> +#ifndef NES_CONTEXT_H
> +#define NES_CONTEXT_H
> +
> +struct nes_qp_context {
> +	u32   misc;
> +	u32   cqs;
> +	u32   sq_addr_low;
> +	u32   sq_addr_high;
> +	u32   rq_addr_low;
> +	u32   rq_addr_high;
> +	u32   misc2;
> +	u32   tcpPorts;
> +	u32   ip0;
> +	u32   ip1;
> +	u32   ip2;
> +	u32   ip3;
> +	u32   mss;
> +	u32   arp_index_vlan;
> +	u32   tcp_state_flow_label;
> +	u32   pd_index_wscale;
> +	u32   keepalive;
> +	u32   ts_recent;
> +	u32   ts_age;
> +	u32   snd_nxt;
> +	u32   snd_wnd;
> +	u32   rcv_nxt;
> +	u32   rcv_wnd;
> +	u32   snd_max;
> +	u32   snd_una;
> +	u32   srtt;
> +	u32   rttvar;
> +	u32   ssthresh;
> +	u32   cwnd;
> +	u32   snd_wl1;
> +	u32   snd_wl2;
> +	u32   max_snd_wnd;
> +	u32   ts_val_delta;
> +	u32   retransmit;
> +	u32   probe_cnt;
> +	u32   hte_index;
> +	u32   q2_addr_low;
> +	u32   q2_addr_high;
> +	u32   ird_index;
> +	u32   Rsvd3;
> +	u32   ird_ord_sizes;
> +	u32   mrkr_offset;
> +	u32   aeq_token_low;
> +	u32   aeq_token_high;

I presume this needs the __le32, etc., types?

I presume you have not yet run this through sparse?  Please read and 
follow Documentation/sparse.txt

	Jeff




^ permalink raw reply

* Re: [PATCH 4/14] nes: connection manager structures and defines
From: Jeff Garzik @ 2007-08-08  1:49 UTC (permalink / raw)
  To: ggrundstrom; +Cc: rdreier, ewg, netdev
In-Reply-To: <200708080052.l780qUXw004708@neteffect.com>

ggrundstrom@neteffect.com wrote:
> +struct ietf_mpa_frame {
> +	u8 key[IETF_MPA_KEY_SIZE];
> +	u8 flags;
> +	u8 rev;
> +	u16 priv_data_len;
> +	u8 priv_data[0];

use unsigned long, not u8, for proper alignment.

plus, as noted in other emails, you should not ever be using 'u8' for 
pointers to raw data.  We use 'void *' for that in the kernel.



> +/* CM context params */
> +struct nes_cm_tcp_context {
> +	u8   client;		
> +
> +	u32  loc_seq_num;	
> +	u32  loc_ack_num;	
> +	u32  rem_ack_num;	
> +	u32  rcv_nxt;		
> +
> +	u32  loc_id;
> +	u32  rem_id;
> +
> +	u32 snd_wnd;
> +	u32 max_snd_wnd;
> +
> +	u32 rcv_wnd;
> +	u32 mss;
> +	u8  snd_wscale;
> +	u8  rcv_wscale;
> +
> +	struct nes_cm_tsa_context tsa_cntxt;	
> +	struct timeval sent_ts;		
> +};
> +
> +struct nes_cm_listener {
> +	struct list_head list;
> +	u64 session_id;	
> +	struct nes_cm_core *core_p;
> +	u8 loc_mac[ETH_ALEN];
> +	nes_addr_t loc_addr;
> +	u16 loc_port;
> +	void *cm_id;
> +	enum nes_cm_conn_type conn_type;
> +	atomic_t ref_count;
> +};
> +
> +/* per connection node and node state information */
> +struct nes_cm_node {
> +	u64 session_id;	
> +	u32 hashkey;	
> +
> +	nes_addr_t loc_addr, rem_addr;
> +	u16 loc_port, rem_port;
> +
> +	u8 loc_mac[ETH_ALEN];	
> +	u8 rem_mac[ETH_ALEN];	
> +
> +	enum nes_cm_node_state state;	
> +	struct nes_cm_tcp_context tcp_cntxt;	
> +	struct nes_cm_core *core_p;
> +	struct sk_buff_head resend_list;
> +	struct nes_cm_node *listener;
> +	atomic_t ref_count;				
> +	struct net_device *netdev_p;
> +
> +	struct nes_cm_node  *loopbackpartner ;
> +	struct list_head retrans_list;
> +	spinlock_t retrans_list_lock;
> +	struct list_head recv_list;
> +	spinlock_t recv_list_lock;
> +	
> +	int send_write0;
> +	union {
> +		struct ietf_mpa_frame mpa_frame_p;	
> +		u8 mpa_frame_b[NES_CM_DEFAULT_MTU];
> +	};
> +	u16 mpa_frame_size;
> +	void *cm_id;
> +	struct list_head list;
> +	int accelerated;
> +	struct nes_cm_listener *listen_p;
> +	enum nes_cm_conn_type conn_type;

use tabs to make your structs reviewable, like you did with the context 
params


> +/* structure for client or CM to fill when making CM api calls. */
> +/*	- only need to set relevant data, based on op. */
> +struct nes_cm_info {
> +	union {
> +		struct iw_cm_id *cm_id;
> +		struct net_device *netdev;
> +	};
> +
> +	u16 loc_port;
> +	u16 rem_port;
> +	nes_addr_t loc_addr;
> +	nes_addr_t rem_addr;
> +
> +	enum nes_cm_conn_type  conn_type;
> +};
> +
> +/* CM event codes */
> +enum  nes_cm_event_type {
> +	NES_CM_EVENT_UNKNOWN,
> +	NES_CM_EVENT_ESTABLISHED,
> +	NES_CM_EVENT_MPA_REQ,
> +	NES_CM_EVENT_MPA_CONNECT,
> +	NES_CM_EVENT_MPA_ACCEPT,
> +	NES_CM_EVENT_MPA_ESTABLISHED,
> +	NES_CM_EVENT_CONNECTED,
> +	NES_CM_EVENT_CLOSED,
> +	NES_CM_EVENT_RESET,
> +	NES_CM_EVENT_DROPPED_PKT,
> +	NES_CM_EVENT_CLOSE_IMMED,
> +	NES_CM_EVENT_CLOSE_HARD,
> +	NES_CM_EVENT_CLOSE_CLEAN,
> +	NES_CM_EVENT_ABORTED,
> +	NES_CM_EVENT_SEND_FIRST
> +};
> +
> +/* event to post to CM event handler */
> +struct nes_cm_event {
> +	enum nes_cm_event_type type;
> +
> +	struct nes_cm_info cm_info;
> +	struct work_struct event_work;
> +	struct nes_cm_node *node_p;
> +};
> +
> +struct nes_cm_core {
> + 	enum nes_cm_node_state state;	
> +	atomic_t session_id;			
> +
> +	atomic_t listen_node_cnt;			
> +	struct nes_cm_node listen_list;	
> +	spinlock_t listen_list_lock;	
> +
> +	u32 mtu;						
> +	u32 free_tx_pkt_max;
> +	u32 rx_pkt_posted;				
> +	struct sk_buff_head tx_free_list;	
> +	atomic_t ht_node_cnt;			
> +	struct list_head connected_nodes;
> +	/* struct list_head hashtable[NES_CM_HASHTABLE_SIZE]; */
> +	spinlock_t ht_lock;				
> +
> +	struct timer_list tcp_timer;	
> +
> +	struct nes_cm_ops *api;			
> +
> +	int (*post_event)(struct nes_cm_event *event_p);
> +	atomic_t events_posted;
> +	struct workqueue_struct *event_wq;
> +	struct workqueue_struct *disconn_wq;
> +
> +	atomic_t node_cnt;
> +	u64 aborted_connects;
> +	u32 options;
> +
> +	struct nes_cm_node *current_listen_node;
> +};
> +
> +
> +#define NES_CM_SET_PKT_SIZE        (1 << 1)
> +#define NES_CM_SET_FREE_PKT_Q_SIZE (1 << 2)
> +
> +/* CM ops/API for client interface */
> +struct nes_cm_ops {
> +	int (*accelerated)(struct nes_cm_core *cm_core_p,
> +			struct nes_cm_node *node_p);
> +	struct nes_cm_listener * (*listen)(struct nes_cm_core *cm_core_p,
> +			struct nes_vnic *nesvnic, struct nes_cm_info *nfo_p);
> +	int (*stop_listener)(struct nes_cm_core *core_p,
> +			struct nes_cm_listener *cm_core_p);
> +	struct nes_cm_node * (*connect)(struct nes_cm_core *cm_core_p,
> +			struct nes_vnic *nesvnic, struct ietf_mpa_frame *mpa_frame_p,
> +			struct nes_cm_info *nfo_p);
> +	int (*close)(struct nes_cm_core *cm_core_p, struct nes_cm_node *node_p);
> +	int (*accept)(struct nes_cm_core *cm_core_p, struct ietf_mpa_frame *mpa_frame_p,
> +			struct nes_cm_node *node_p);
> +	int (*reject)(struct nes_cm_core *cm_core_p, struct ietf_mpa_frame *mpa_frame_p,
> +			struct nes_cm_node *node_p);
> +	int (*recv_pkt)(struct nes_cm_core *cm_core_p, struct nes_vnic *nesvnic,
> +			struct sk_buff *skb_p);
> +	int (*destroy_cm_core)(struct nes_cm_core *core_p);
> +	int (*get)(struct nes_cm_core *cm_core_p);
> +	int (*set)(struct nes_cm_core *core_p, u32 type, u32 value);
> +};

how many users of this interface will be in the kernel, assuming your 
submission is accepted?


^ permalink raw reply

* [PATCH 1/14] nes: module and device initialization
From: root @ 2007-08-08  0:38 UTC (permalink / raw)
  To: rdreier; +Cc: ewg, ggrundstrom, netdev

Kernel module and device initialization routines.

Signed-off-by: Glenn Grundstrom <ggrundstrom@neteffect.com>
---
diff -Nurp NULL ofa_kernel-1.2/drivers/infiniband/hw/nes/nes.c
--- NULL	1969-12-31 18:00:00.000000000 -0600
+++ ofa_kernel-1.2/drivers/infiniband/hw/nes/nes.c	2007-08-06 20:09:04.000000000 -0500
@@ -0,0 +1,833 @@
+/*
+ * Copyright (c) 2006 - 2007 NetEffect, Inc. All rights reserved.
+ * Copyright (c) 2005 Open Grid Computing, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/if_vlan.h>
+#include <linux/crc32.h>
+#include <linux/in.h>
+#include <linux/fs.h>
+#include <linux/init.h>
+#include <linux/if_arp.h>
+#include <asm/io.h>
+#include <asm/irq.h>
+#include <asm/byteorder.h>
+#include <rdma/ib_smi.h>
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_pack.h>
+#include <rdma/iw_cm.h>
+
+#include "nes.h"
+
+#include <net/netevent.h>
+#include <net/neighbour.h>
+#include <linux/route.h>
+#include <net/ip_fib.h>
+
+#ifdef SPIN_BUG_ON
+#undef SPIN_BUG_ON
+#define SPIN_BUG_ON (...)
+#endif
+
+MODULE_AUTHOR("NetEffect");
+MODULE_DESCRIPTION("NetEffect RNIC Low-level iWARP Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(DRV_VERSION);
+
+atomic_t qps_destroyed;
+
+int max_mtu = 9000;
+int nics_per_function = 1;
+
+#ifdef NES_INT_MODERATE
+int interrupt_mod_interval = 128;
+#else
+int interrupt_mod_interval = 0;
+#endif
+
+/* Interoperability */
+int mpa_version = 1;
+module_param(mpa_version, int, 0);
+MODULE_PARM_DESC(mpa_version, "MPA version to be used int MPA Req/Resp (0 or 1)");
+
+/* Interoperability */
+int disable_mpa_crc = 0;
+module_param(disable_mpa_crc, int, 0);
+MODULE_PARM_DESC(disable_mpa_crc, "Disable checking of MPA CRC");
+
+unsigned int send_first = 0;
+module_param(send_first, int, 0);
+MODULE_PARM_DESC(send_first, "Send RDMA Message First on Active Connection");
+
+
+unsigned int nes_drv_opt = 0;
+module_param(nes_drv_opt, int, 0);
+MODULE_PARM_DESC(nes_drv_opt, "Driver option parameters");
+
+unsigned int nes_debug_level = 0;
+module_param(nes_debug_level, uint, 0644);
+MODULE_PARM_DESC(nes_debug_level, "Enable debug output level");
+
+LIST_HEAD(nes_adapter_list);
+LIST_HEAD(nes_dev_list);
+
+static void nes_print_macaddr(struct net_device *netdev);
+static irqreturn_t nes_interrupt(int, void *);
+static int __devinit nes_probe(struct pci_dev *, const struct pci_device_id *);
+static void __devexit nes_remove(struct pci_dev *);
+static int __init nes_init_module(void);
+static void __exit nes_exit_module(void);
+
+static struct pci_device_id nes_pci_table[] = {
+	{PCI_VENDOR_ID_NETEFFECT, PCI_DEVICE_ID_NETEFFECT_NE020, PCI_ANY_ID, PCI_ANY_ID},
+	{0}
+};
+
+MODULE_DEVICE_TABLE(pci, nes_pci_table);
+
+static int nes_inetaddr_event(struct notifier_block *notifier, unsigned long event, void *ptr);
+static int nes_net_event(struct notifier_block *notifier, unsigned long event, void *ptr);
+static int notifiers_registered = 0;
+
+
+static struct notifier_block nes_inetaddr_notifier = {
+	.notifier_call = nes_inetaddr_event
+};
+
+static struct notifier_block nes_net_notifier = {
+	.notifier_call = nes_net_event
+};
+
+
+
+
+/**
+ * nes_inetaddr_event
+ */
+static int nes_inetaddr_event(struct notifier_block *notifier,
+		unsigned long event, void *ptr)
+{
+	struct in_ifaddr *ifa = ptr;
+	struct net_device *event_netdev = ifa->ifa_dev->dev;
+	struct nes_device *nesdev;
+	struct net_device *netdev;
+	struct nes_vnic *nesvnic;
+	unsigned int addr;
+	unsigned int mask;
+
+	dprintk("nes_inetaddr_event: notifier %p event=%ld netdev=%p,"
+			" interface name = %s.\n",
+			notifier, event, event_netdev, event_netdev->name);
+
+	addr = ntohl(ifa->ifa_address);
+	mask = ntohl(ifa->ifa_mask);
+	dprintk("nes_inetaddr_event: ip address %08X, netmask %08X.\n", addr, mask);
+	list_for_each_entry(nesdev, &nes_dev_list, list) {
+		dprintk("Nesdev list entry = 0x%p. (%s) \n", nesdev, nesdev->netdev[0]->name);
+		netdev = nesdev->netdev[0];
+		nesvnic = netdev_priv(netdev);
+		if (netdev == event_netdev) {
+			if (0 == nesvnic->rdma_enabled) {
+				dprintk("%s: Returning without processing event for %s since"
+						" RDMA is not enabled. \n",
+						__FUNCTION__, netdev->name);
+				return(NOTIFY_OK);
+			}
+			/* we have ifa->ifa_address/mask here if we need it */
+			switch (event) {
+				case NETDEV_DOWN:
+					dprintk("event:DOWN \n");
+					nes_write_indexed(nesdev,
+							NES_IDX_DST_IP_ADDR+(0x10*PCI_FUNC(nesdev->pcidev->devfn)), 0);
+
+					nesvnic->local_ipaddr = 0;
+					return(NOTIFY_OK);
+					break;
+				case NETDEV_UP:
+					dprintk("event:UP \n");
+
+					if (nesvnic->local_ipaddr != 0) {
+						dprintk("%s[%u] Interface already has local_ipaddr\n",
+								__FUNCTION__, __LINE__);
+						return(NOTIFY_OK);
+					}
+					/* Add the address to the IP table */
+					nesvnic->local_ipaddr = ifa->ifa_address;
+
+					nes_write_indexed(nesdev,
+							NES_IDX_DST_IP_ADDR+(0x10*PCI_FUNC(nesdev->pcidev->devfn)),
+							ntohl(ifa->ifa_address));
+					return(NOTIFY_OK);
+					break;
+				default:
+					break;
+			}
+		}
+	}
+
+	return(NOTIFY_DONE);
+}
+
+
+/**
+ * nes_net_event
+ */
+static int nes_net_event(struct notifier_block *notifier,
+		unsigned long event, void *ptr)
+{
+	struct neighbour *neigh = ptr;
+	struct nes_device *nesdev;
+	struct net_device *netdev;
+	struct nes_vnic *nesvnic;
+
+	switch (event) {
+		case NETEVENT_NEIGH_UPDATE:
+			/* dprintk("NETEVENT_NEIGH_UPDATE:\n"); */
+			list_for_each_entry(nesdev, &nes_dev_list, list) {
+				/* dprintk("Nesdev list entry = 0x%p.\n", nesdev); */
+				netdev = nesdev->netdev[0];
+				nesvnic = netdev_priv(netdev);
+				if (netdev == neigh->dev) {
+					if (0 == nesvnic->rdma_enabled) {
+						dprintk("Skipping device %s since no RDMA\n", netdev->name);
+					} else {
+						/* dprintk("Neighbour is considered: "); */
+						if (neigh->nud_state & NUD_VALID) {
+							/* dprintk("VALID\n"); */
+							nes_manage_arp_cache(neigh->dev, neigh->ha,
+									ntohl(*(u32 *)neigh->primary_key), NES_ARP_ADD);
+						} else {
+							/* dprintk("INVALID\n"); */
+							nes_manage_arp_cache(neigh->dev, neigh->ha,
+									ntohl(*(u32 *)neigh->primary_key), NES_ARP_DELETE);
+						}
+					}
+					return(NOTIFY_OK);
+				}
+			}
+			break;
+
+		case NETEVENT_PMTU_UPDATE:
+			dprintk("NETEVENT_PMTU_UPDATE:\n");
+			break;
+		case NETEVENT_REDIRECT:
+			dprintk("NETEVENT_REDIRECT:\n");
+			break;
+
+		default:
+			dprintk("NETEVENT_ %lu undefined\n", event);
+			break;
+	}
+
+	return(NOTIFY_DONE);
+}
+
+
+/**
+ * nes_add_ref
+ */
+void nes_add_ref(struct ib_qp *ibqp)
+{
+	struct nes_qp *nesqp;
+
+	nesqp = to_nesqp(ibqp);
+	dprintk("%s: Bumping refcount for QP%u.  Pre-inc value = %u\n",
+			__FUNCTION__, ibqp->qp_num, atomic_read(&nesqp->refcount));
+	atomic_inc(&nesqp->refcount);
+}
+
+
+/**
+ * nes_rem_ref
+ */
+void nes_rem_ref(struct ib_qp *ibqp)
+{
+	u64 u64temp;
+	struct nes_qp *nesqp;
+	struct nes_vnic *nesvnic = to_nesvnic(ibqp->device);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+	struct nes_hw_cqp_wqe *cqp_wqe;
+	struct nes_cqp_request *cqp_request;
+	unsigned long flags;
+
+	nesqp = to_nesqp(ibqp);
+
+	dprintk("%s: Decing refcount for QP%u.  Pre-dec value = %u\n",
+			__FUNCTION__, ibqp->qp_num, atomic_read(&nesqp->refcount) );
+	if (atomic_read(&nesqp->refcount) == 0) {
+		printk(KERN_INFO PFX "%s: Reference count already 0 for QP%d, last aeq = 0x%04X.\n",
+				__FUNCTION__, ibqp->qp_num, nesqp->last_aeq );
+		BUG();
+	}
+
+	if (atomic_dec_and_test(&nesqp->refcount)) {
+		dprintk("%s: Refcount for QP%u is 0. Freeing QP structure, nesadapter = %p\n",
+				__FUNCTION__, ibqp->qp_num, nesadapter);
+		atomic_inc(&qps_destroyed);
+
+		/* Free the control structures */
+		pci_free_consistent(nesdev->pcidev, nesqp->qp_mem_size, nesqp->hwqp.sq_vbase,
+				nesqp->hwqp.sq_pbase);
+
+		nesadapter->qp_table[nesqp->hwqp.qp_id-NES_FIRST_QPN] = NULL;
+		nes_free_resource(nesadapter, nesadapter->allocated_qps, nesqp->hwqp.qp_id);
+
+		/* Destroy the QP */
+		spin_lock_irqsave(&nesdev->cqp.lock, flags);
+		cqp_request = nes_get_cqp_request(nesdev, NES_CQP_REQUEST_HOLDING_LOCK);
+		if (NULL == cqp_request) {
+			dprintk("%s: Failed to get a cqp_request.\n", __FUNCTION__);
+			spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+			return;
+		}
+		cqp_request->waiting = 0;
+		cqp_wqe = &cqp_request->cqp_wqe;
+
+		cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] =
+				cpu_to_le32(NES_CQP_DESTROY_QP | NES_CQP_QP_TYPE_IWARP);
+
+		if (nesqp->hte_added) {
+			dprintk("%s:%u: set CQP_QP_DEL_HTE\n", __FUNCTION__, __LINE__);
+			cqp_wqe->wqe_words[NES_CQP_WQE_OPCODE_IDX] |=  cpu_to_le32(NES_CQP_QP_DEL_HTE);
+			nesqp->hte_added = 0;
+		}
+
+		cqp_wqe->wqe_words[NES_CQP_WQE_ID_IDX] = cpu_to_le32(nesqp->hwqp.qp_id);
+		cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_HIGH_IDX] = 0;
+		*((struct nes_hw_cqp **)&cqp_wqe->wqe_words[NES_CQP_WQE_COMP_CTX_LOW_IDX]) =
+				&nesdev->cqp;
+		cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_LOW_IDX] = 0;
+		cqp_wqe->wqe_words[NES_CQP_WQE_COMP_SCRATCH_HIGH_IDX] = 0;
+		u64temp = (u64)nesqp->nesqp_context_pbase;
+		cqp_wqe->wqe_words[NES_CQP_QP_WQE_CONTEXT_LOW_IDX] = cpu_to_le32((u32)u64temp);
+		cqp_wqe->wqe_words[NES_CQP_QP_WQE_CONTEXT_HIGH_IDX] =
+				cpu_to_le32((u32)(u64temp >> 32));
+
+		nes_post_cqp_request(nesdev, cqp_request, NES_CQP_REQUEST_HOLDING_LOCK,
+				NES_CQP_REQUEST_RING_DOORBELL);
+
+		spin_unlock_irqrestore(&nesdev->cqp.lock, flags);
+
+		kfree(nesqp->allocated_buffer);
+	}
+}
+
+
+/**
+ * nes_get_qp
+ */
+struct ib_qp *nes_get_qp(struct ib_device *device, int qpn) {
+	struct nes_vnic *nesvnic = to_nesvnic(device);
+	struct nes_device *nesdev = nesvnic->nesdev;
+	struct nes_adapter *nesadapter = nesdev->nesadapter;
+
+	if ((qpn<NES_FIRST_QPN) || (qpn>=(NES_FIRST_QPN+nesadapter->max_qp)))
+		return (NULL);
+
+	return(&nesadapter->qp_table[qpn-NES_FIRST_QPN]->ibqp);
+}
+
+
+/**
+ * nes_print_macaddr
+ */
+static void nes_print_macaddr(struct net_device *netdev)
+{
+	dprintk("%s: MAC %02X:%02X:%02X:%02X:%02X:%02X, "
+			"IRQ %u\n", netdev->name,
+			netdev->dev_addr[0], netdev->dev_addr[1], netdev->dev_addr[2],
+			netdev->dev_addr[3], netdev->dev_addr[4], netdev->dev_addr[5],
+			netdev->irq);
+}
+
+
+/**
+ * nes_interrupt - handle interrupts
+ */
+static irqreturn_t nes_interrupt(int irq, void *dev_id)
+{
+	struct nes_device *nesdev = (struct nes_device *)dev_id;
+	int handled = 0;
+#ifdef NES_LEGACY_INT_DETECT
+	u32 int_mask;
+	u32 int_req;
+	u32 int_stat;
+	u32 intf_int_stat;
+	u32 timer_stat;
+#endif
+
+	if (nesdev->msi_enabled) {
+		/* No need to read the interrupt pending register if msi is enabled */
+		handled = 1;
+	} else {
+#ifdef NES_LEGACY_INT_DETECT
+		/* Master interrupt enable provides synchronization for kicking off bottom half
+		  when interrupt sharing is going on */
+		int_mask = nes_read32(nesdev->regs + NES_INT_MASK);
+		if (int_mask & 0x80000000) {
+			/* Check interrupt status to see if this might be ours */
+			int_stat = nes_read32(nesdev->regs + NES_INT_STAT);
+			int_req = nesdev->int_req;
+			if (int_stat&int_req) {
+				/* if interesting CEQ or AEQ is pending, claim the interrupt */
+				if ((int_stat&int_req) & (~(NES_INT_TIMER|NES_INT_INTF))) {
+					/* dprintk("%s: Handling CEQ/AEQ/MAC Interrupt, int_req = 0x%08X\n",
+							__FUNCTION__, int_req); */
+					handled = 1;
+				} else {
+					if (((int_stat & int_req) & NES_INT_TIMER) == NES_INT_TIMER) {
+						/* Timer might be running but might be for another function */
+						timer_stat = nes_read32(nesdev->regs + NES_TIMER_STAT);
+						if ((timer_stat & nesdev->timer_int_req) != 0) {
+							/* dprintk("%s: Handling Timer Interrupt\n", __FUNCTION__); */
+							handled = 1;
+						}
+					}
+					if ((((int_stat & int_req) & NES_INT_INTF) == NES_INT_INTF) &&
+							(0 == handled)) {
+						intf_int_stat = nes_read32(nesdev->regs+NES_INTF_INT_STAT);
+						if ((intf_int_stat & nesdev->intf_int_req) != 0) {
+							/* dprintk("%s: Handling an Interface Interrupt\n", __FUNCTION__); */
+							handled = 1;
+						}
+					}
+				}
+				if (handled) {
+					nes_write32(nesdev->regs+NES_INT_MASK, int_mask & (~0x80000000));
+					int_mask = nes_read32(nesdev->regs+NES_INT_MASK);
+					/* Save off the status to save an additional read */
+					nesdev->int_stat = int_stat;
+					nesdev->napi_isr_ran = 1;
+				}
+			}
+		}
+#else
+		handled = nes_read32(nesdev->regs+NES_INT_PENDING);
+		/* dprintk("Interrupt Pending value  = 0x%08X\n", handled ); */
+#endif
+	}
+
+	if (handled) {
+#ifdef NES_NAPI
+		if (0 == nes_napi_isr(nesdev)) {
+#endif
+			tasklet_schedule(&nesdev->dpc_tasklet);
+#ifdef NES_NAPI
+		}
+#endif
+		return(IRQ_HANDLED);
+	} else {
+		/* dprintk("%s[%u] Returning IRQ_NONE\n", __FUNCTION__, __LINE__); */
+		return(IRQ_NONE);
+	}
+}
+
+
+/**
+ * nes_probe - Device initialization
+ */
+static int __devinit nes_probe(struct pci_dev *pcidev, const struct pci_device_id *ent)
+{
+	struct net_device *netdev = NULL;
+	struct nes_device *nesdev = NULL;
+	int ret = 0;
+	struct nes_vnic *nesvnic = NULL;
+	void __iomem *mmio_regs = NULL;
+	u8 hw_rev;
+
+	assert(pcidev != NULL);
+	assert(ent != NULL);
+
+	printk(KERN_INFO PFX "NetEffect RNIC driver v%s loading. (%s)\n",
+			DRV_VERSION, pci_name(pcidev));
+
+	ret = pci_enable_device(pcidev);
+	if (ret) {
+		printk(KERN_ERR PFX "Unable to enable PCI device. (%s)\n", pci_name(pcidev));
+		goto bail0;
+	}
+
+	dprintk("BAR0 (@0x%08lX) size = 0x%lX bytes\n",
+			(long unsigned int)pci_resource_start(pcidev, BAR_0),
+			(long unsigned int)pci_resource_len(pcidev, BAR_0));
+	dprintk("BAR1 (@0x%08lX) size = 0x%lX bytes\n",
+			(long unsigned int)pci_resource_start(pcidev, BAR_1),
+			(long unsigned int)pci_resource_len(pcidev, BAR_1));
+
+	/* Make sure PCI base addr are MMIO */
+	if (!(pci_resource_flags(pcidev, BAR_0) & IORESOURCE_MEM) ||
+		!(pci_resource_flags(pcidev, BAR_1) & IORESOURCE_MEM)) {
+		printk(KERN_ERR PFX "PCI regions not an MMIO resource\n");
+		ret = -ENODEV;
+		goto bail1;
+	}
+
+	/* Reserve PCI I/O and memory resources */
+	ret = pci_request_regions(pcidev, DRV_NAME);
+	if (ret) {
+		printk(KERN_ERR PFX "Unable to request regions. (%s)\n", pci_name(pcidev));
+		goto bail1;
+	}
+
+	if ((sizeof(dma_addr_t) > 4)) {
+		ret = pci_set_dma_mask(pcidev, DMA_64BIT_MASK);
+		if (ret < 0) {
+			printk(KERN_ERR PFX "64b DMA mask configuration failed\n");
+			goto bail2;
+		}
+		ret = pci_set_consistent_dma_mask(pcidev, DMA_64BIT_MASK);
+		if (ret) {
+			printk(KERN_ERR PFX "64b DMA consistent mask configuration failed\n");
+			goto bail2;
+		}
+	} else {
+		ret = pci_set_dma_mask(pcidev, DMA_32BIT_MASK);
+		if (ret < 0) {
+			printk(KERN_ERR PFX "32b DMA mask configuration failed\n");
+			goto bail2;
+		}
+		ret = pci_set_consistent_dma_mask(pcidev, DMA_32BIT_MASK);
+		if (ret) {
+			printk(KERN_ERR PFX "32b DMA consistent mask configuration failed\n");
+			goto bail2;
+		}
+	}
+
+	pci_set_master(pcidev);
+
+	/* Allocate hardware structure */
+	nesdev = kmalloc(sizeof(struct nes_device), GFP_KERNEL);
+	if (!nesdev) {
+		printk(KERN_ERR PFX "%s: Unable to alloc hardware struct\n", pci_name(pcidev));
+		ret = -ENOMEM;
+		goto bail2;
+	}
+
+	memset(nesdev, 0, sizeof(struct nes_device));
+	dprintk("%s: Allocated nes device at %p\n", __FUNCTION__, nesdev);
+	nesdev->pcidev = pcidev;
+	pci_set_drvdata(pcidev, nesdev);
+
+	pci_read_config_byte(pcidev, 0x0008, &hw_rev);
+	dprintk("hw_rev=%u\n", hw_rev);
+
+	spin_lock_init(&nesdev->indexed_regs_lock);
+
+	/* Remap the PCI registers in adapter BAR0 to kernel VA space */
+	mmio_regs = ioremap_nocache(pci_resource_start(pcidev, BAR_0), sizeof(mmio_regs));
+	if (mmio_regs == 0UL) {
+		printk(KERN_ERR PFX "Unable to remap BAR0\n");
+		ret = -EIO;
+		goto bail3;
+	}
+	nesdev->regs = mmio_regs;
+	nesdev->index_reg = 0x50 + (PCI_FUNC(pcidev->devfn)*8) + mmio_regs;
+
+	/* Ensure interrupts are disabled */
+	nes_write32(nesdev->regs+NES_INT_MASK, 0x7fffffff);
+
+#ifdef CONFIG_PCI_MSI
+	if (nes_drv_opt & NES_DRV_OPT_ENABLE_MSI) {
+		if (!pci_enable_msi(nesdev->pcidev)) {
+			nesdev->msi_enabled = 1;
+			dprintk("%s: MSI is enabled for device %s\n",
+					__FUNCTION__, pci_name(pcidev));
+		} else {
+			dprintk("%s: MSI is disabled by linux for device %s\n",
+					__FUNCTION__, pci_name(pcidev));
+		}
+	} else {
+		dprintk("%s: MSI not requested due to driver options for device %s\n",
+				__FUNCTION__, pci_name(pcidev));
+	}
+#else
+	dprintk("%s: MSI not supported by this kernel for device %s\n",
+			__FUNCTION__, pci_name(pcidev));
+#endif
+
+	nesdev->et_rx_coalesce_usecs_irq = interrupt_mod_interval;
+	nesdev->csr_start = pci_resource_start(nesdev->pcidev, BAR_0);
+	nesdev->doorbell_start = pci_resource_start(nesdev->pcidev, BAR_1);
+
+	/* Init the adapter */
+	nesdev->nesadapter = nes_init_adapter(nesdev, hw_rev);
+	if (!nesdev->nesadapter) {
+		printk(KERN_ERR PFX "Unable to initialize adapter.\n" );
+		ret = -ENOMEM;
+		goto bail5;
+	}
+
+	nesdev->mac_index = PCI_FUNC(nesdev->pcidev->devfn)%nesdev->nesadapter->port_count;
+	tasklet_init(&nesdev->dpc_tasklet, nes_dpc, (unsigned long)nesdev);
+
+	/* bring up the Control QP */
+	if (nes_init_cqp(nesdev)) {
+		ret = -ENODEV;
+		goto bail6;
+	}
+
+	/* Arm the CCQ */
+	nes_write32(nesdev->regs+NES_CQE_ALLOC, NES_CQE_ALLOC_NOTIFY_NEXT |
+			PCI_FUNC(nesdev->pcidev->devfn));
+	nes_read32(nesdev->regs+NES_CQE_ALLOC);
+
+	/* Enable the interrupts */
+	nesdev->int_req = (0x101 << PCI_FUNC(nesdev->pcidev->devfn)) |
+			(1 << (PCI_FUNC(nesdev->pcidev->devfn)+16));
+	if (PCI_FUNC(nesdev->pcidev->devfn) < 4) {
+		nesdev->int_req |= (1 << (PCI_FUNC(nesdev->pcidev->devfn)+24));
+	}
+
+	/* TODO: This really should be the first driver to load, not function 0 */
+	if (0 == PCI_FUNC(nesdev->pcidev->devfn)) {
+		/* pick up PCI and critical errors if the first driver to load */
+		nesdev->intf_int_req = NES_INTF_INT_PCIERR | NES_INTF_INT_CRITERR;
+		nesdev->int_req |= NES_INT_INTF;
+	} else {
+		nesdev->intf_int_req = 0;
+	}
+	nesdev->intf_int_req |= (1 << (PCI_FUNC(nesdev->pcidev->devfn)+16));
+	nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS0, 0);
+	nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS1, 0);
+	nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS2, 0x00001265);
+	nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS4, 0x18021804);
+	
+	nes_write_indexed(nesdev, NES_IDX_DEBUG_ERROR_MASKS3, 0x17801790);
+
+	/* deal with both periodic and one_shot */
+	nesdev->timer_int_req = 0x101 << PCI_FUNC(nesdev->pcidev->devfn);
+	nesdev->nesadapter->timer_int_req |= nesdev->timer_int_req;
+	dprintk("%s: setting int_req for function %u, nesdev = 0x%04X, adapter = 0x%04X\n",
+			__FUNCTION__, PCI_FUNC(nesdev->pcidev->devfn),
+			nesdev->timer_int_req, nesdev->nesadapter->timer_int_req);
+
+	nes_write32(nesdev->regs+NES_INTF_INT_MASK, ~(nesdev->intf_int_req));
+
+	list_add_tail(&nesdev->list, &nes_dev_list);
+	dprintk("%s:%s:%u\n", __FILE__, __FUNCTION__, __LINE__);
+
+	/* Request an interrupt line for the driver */
+	ret = request_irq(pcidev->irq, nes_interrupt, SA_SHIRQ, DRV_NAME, nesdev);
+	if (ret) {
+		printk(KERN_ERR PFX "%s: requested IRQ %u is busy\n", pci_name(pcidev), pcidev->irq);
+		goto bail65;
+	}
+
+	nes_write32(nesdev->regs+NES_INT_MASK, ~nesdev->int_req);
+
+	if (!notifiers_registered) {
+		register_inetaddr_notifier(&nes_inetaddr_notifier);
+		register_netevent_notifier(&nes_net_notifier);
+		notifiers_registered = 1;
+	}
+
+	/* Initialize network devices */
+	if ((netdev = nes_netdev_init(nesdev, mmio_regs)) == NULL) {
+		goto bail7;
+	}
+
+	/* Register network device */
+	ret = register_netdev(netdev);
+	if (ret) {
+		printk(KERN_ERR PFX "Unable to register netdev, ret = %d\n", ret);
+		nes_netdev_destroy(netdev);
+		goto bail7;
+	}
+
+	netif_stop_queue(netdev);
+	nes_print_macaddr(netdev);
+		/* create a CM core for this netdev */
+	nesvnic = netdev_priv(netdev);
+
+	nesdev->netdev_count++;
+	nesdev->nesadapter->netdev_count++;
+
+
+	printk(KERN_ERR PFX "%s: NetEffect RNIC driver successfully loaded"
+			" for port %d (%d ports available).\n",
+			pci_name(pcidev), PCI_FUNC(pcidev->devfn),
+			nesdev->nesadapter->port_count);
+	return (0);
+
+	bail7:
+	printk(KERN_ERR PFX "bail7\n");
+	while (nesdev->netdev_count > 0) {
+		nesdev->netdev_count--;
+		nesdev->nesadapter->netdev_count--;
+
+		unregister_netdev(nesdev->netdev[nesdev->netdev_count]);
+		nes_netdev_destroy(nesdev->netdev[nesdev->netdev_count]);
+	}
+
+	dprintk("%s: netdev_count=%d, nesadapter->netdev_count=%d\n",
+			__FUNCTION__, nesdev->netdev_count, nesdev->nesadapter->netdev_count);
+
+	if (notifiers_registered) {
+		unregister_netevent_notifier(&nes_net_notifier);
+		unregister_inetaddr_notifier(&nes_inetaddr_notifier);
+		notifiers_registered = 0;
+	}
+
+	list_del(&nesdev->list);
+	nes_destroy_cqp(nesdev);
+
+	bail65:
+	printk(KERN_ERR PFX "bail65\n");
+	free_irq(pcidev->irq, nesdev);
+#ifdef CONFIG_PCI_MSI
+	if (nesdev->msi_enabled) {
+		pci_disable_msi(pcidev);
+	}
+#endif
+	bail6:
+	printk(KERN_ERR PFX "bail6\n");
+	tasklet_kill(&nesdev->dpc_tasklet);
+	/* Deallocate the Adapter Structure */
+	nes_destroy_adapter(nesdev->nesadapter);
+
+	bail5:
+	printk(KERN_ERR PFX "bail5\n");
+	iounmap(nesdev->regs);
+
+	bail3:
+	printk(KERN_ERR PFX "bail3\n");
+	kfree(nesdev);
+
+	bail2:
+	pci_release_regions(pcidev);
+
+	bail1:
+	pci_disable_device(pcidev);
+
+	bail0:
+	return(ret);
+}
+
+
+/**
+ * nes_remove - unload from kernel
+ */
+static void __devexit nes_remove(struct pci_dev *pcidev)
+{
+	struct nes_device *nesdev = pci_get_drvdata(pcidev);
+	struct net_device *netdev;
+	int netdev_index=0;
+
+	/* dprintk("%s called.\n", __FUNCTION__); */
+	if (nesdev->netdev_count) {
+		netdev = nesdev->netdev[netdev_index];
+		if (netdev) {
+			netif_stop_queue(netdev);
+			unregister_netdev(netdev);
+			nes_netdev_destroy(netdev);
+
+			nesdev->netdev[netdev_index] = NULL;
+			nesdev->netdev_count--;
+			nesdev->nesadapter->netdev_count--;
+		}
+	}
+	if (notifiers_registered) {
+		unregister_netevent_notifier(&nes_net_notifier);
+		unregister_inetaddr_notifier(&nes_inetaddr_notifier);
+		notifiers_registered = 0;
+	}
+
+	list_del(&nesdev->list);
+	nes_destroy_cqp(nesdev);
+	tasklet_kill(&nesdev->dpc_tasklet);
+
+	dprintk("nes_remove: calling nes_destroy_adapter(%p).\n", nesdev->nesadapter);
+	/* Deallocate the Adapter Structure */
+	nes_destroy_adapter(nesdev->nesadapter);
+
+	dprintk("nes_remove: calling free_irq.\n");
+	free_irq(pcidev->irq, nesdev);
+
+#ifdef CONFIG_PCI_MSI
+	if (nesdev->msi_enabled) {
+		pci_disable_msi(pcidev);
+	}
+#endif
+
+	dprintk("nes_remove: calling iounmap.\n");
+	iounmap(nesdev->regs);
+
+	kfree(nesdev);
+
+	/* dprintk("nes_remove: calling pci_release_regions.\n"); */
+	pci_release_regions(pcidev);
+
+	/* dprintk("nes_remove: calling pci_disable_device.\n"); */
+	pci_disable_device(pcidev);
+
+	dprintk("nes_remove: calling pci_set_drvdata.\n");
+	pci_set_drvdata(pcidev, NULL);
+}
+
+
+static struct pci_driver nes_pci_driver = {
+	.name = DRV_NAME,
+	.id_table = nes_pci_table,
+	.probe = nes_probe,
+	.remove = __devexit_p(nes_remove),
+};
+
+
+/**
+ * nes_init_module - module initialization entry point
+ */
+static int __init nes_init_module(void)
+{
+	nes_cm_start();
+	return(pci_module_init(&nes_pci_driver));
+}
+
+
+/**
+ * nes_exit_module - module unload entry point
+ */
+static void __exit nes_exit_module(void)
+{
+	nes_cm_stop();
+	pci_unregister_driver(&nes_pci_driver);
+}
+
+
+module_init(nes_init_module);
+module_exit(nes_exit_module);
+

^ permalink raw reply

* Re: [PATCH 3/14] nes: connection manager routines
From: Jeff Garzik @ 2007-08-08  1:46 UTC (permalink / raw)
  To: ggrundstrom; +Cc: rdreier, ewg, netdev
In-Reply-To: <200708080050.l780oGxo004694@neteffect.com>

ggrundstrom@neteffect.com wrote:
> +atomic_t cm_connects;
> +atomic_t cm_accepts;
> +atomic_t cm_disconnects;
> +atomic_t cm_closes;
> +atomic_t cm_connecteds;
> +atomic_t cm_connect_reqs;
> +atomic_t cm_rejects;

do you really want to take the hit of a LOCK prefix each time you 
increment a stat???


> +static struct nes_cm_event *create_event(struct nes_cm_node *node_p,
> +		enum nes_cm_event_type type)
> +{
> +	struct nes_cm_event *event_p;
> +
> +	if(!node_p->cm_id)
> +		return NULL;
> +
> +	/* allocate an empty event */
> +	event_p = (struct nes_cm_event *)kzalloc(sizeof(struct nes_cm_event),
> +			GFP_ATOMIC);

kill pointless cast from void*

> +	if (!event_p)
> +		return(NULL);

return is not a function.  remove the parens.


> +	event_p->type = type;
> +	event_p->node_p = node_p;
> +	event_p->cm_info.rem_addr = node_p->rem_addr;
> +	event_p->cm_info.loc_addr = node_p->loc_addr;
> +	event_p->cm_info.rem_port = node_p->rem_port;
> +	event_p->cm_info.loc_port = node_p->loc_port;
> +	event_p->cm_info.cm_id = node_p->cm_id;
> +
> +	dprintk("%s[%u] Created event_p=%p, type=%u, dst_addr=%08x[%x], src_addr=%08x[%x]\n",
> +			__FUNCTION__, __LINE__, event_p, type,
> +			event_p->cm_info.loc_addr, event_p->cm_info.loc_port,
> +			event_p->cm_info.rem_addr, event_p->cm_info.rem_port);

these dprintk's make the code far more unreadable than it should be. 
these should be cleaned up.


> +	nes_cm_post_event(event_p);
> +	return(event_p);

return is not a function, remove parens


> +int send_mpa_request(struct nes_cm_node *node_p)
> +{
> +	struct sk_buff *skb_p;
> +	int ret;
> +
> +	skb_p = get_free_pkt(node_p);
> +	if (!skb_p) {
> +		dprintk("%s:%s[%u] -- Failed to get a Free pkt\n",
> +				__FILE__, __FUNCTION__, __LINE__);
> +		return (-1);
> +	}
> +
> +	/* send an MPA Request frame */
> +	form_cm_frame(skb_p, node_p, NULL, 0, &node_p->mpa_frame_p,
> +			node_p->mpa_frame_size, SET_ACK);
> +
> +	ret = schedule_nes_timer(node_p, skb_p, NES_TIMER_TYPE_SEND, 1);
> +	if (ret < 0) {
> +		return (ret);
> +	}

remove braces around single C statements


> +	dprintk("%s[%u] -- \n", __FUNCTION__, __LINE__);
> +	return (0);

* remove all the "_p" suffixes.  the kernel is not the place to start 
approaching Hungarian notation

* return is not a function


> + * recv_mpa - process a received TCP pkt, we are expecting an
> + * IETF MPA frame
> + */
> +static int parse_mpa(struct nes_cm_node *node_p, u8 *buffer, u32 len)

'buffer' should be void* not u8*


> +{
> +	struct ietf_mpa_frame *mpa_frame_p;
> +
> +	dprintk("%s[%u] Enter, node_p=%p\n", __FUNCTION__, __LINE__, node_p);
> +	nes_dump_mem(buffer, len);
> +
> +	/* assume req frame is in tcp data payload */
> +	if (len < sizeof(struct ietf_mpa_frame)) {
> +		dprintk("The received ietf buffer was too small (%x)\n", len);
> +		return (-1);

return is not a function


> +	}
> +
> +	mpa_frame_p = (struct ietf_mpa_frame *)buffer;

kill pointless cast, once 'buffer' type is fixed


> +	node_p->mpa_frame_size = (u32)ntohs(mpa_frame_p->priv_data_len);

kill pointless cast


> +	if (node_p->mpa_frame_size + sizeof(struct ietf_mpa_frame) != len) {
> +		dprintk("The received ietf buffer was not right complete (%x + %x != %x)\n",
> +				node_p->mpa_frame_size, (u32)sizeof(struct ietf_mpa_frame), len);
> +		return (-1);
> +	}
> +
> +	dprintk("%s[%u] -- recvd MPA Frame - with private data len = %u\n",
> +			__FILE__, __LINE__, node_p->mpa_frame_size);
> +
> +	/* copy entire MPA frame to our node's frame */
> +	memcpy(node_p->mpa_frame_b, buffer + sizeof(struct ietf_mpa_frame),
> +			node_p->mpa_frame_size);
> +	nes_dump_mem(&node_p->mpa_frame_p, node_p->mpa_frame_size);
> +	dprintk("%s:%s[%u] -- Exit\n", __FILE__, __FUNCTION__, __LINE__);
> +
> +	return(0);
> +}

return is not a function


> + * handle_exception_pkt - process an exception packet.
> + * We have been in a TSA state, and we have now received SW
> + * TCP/IP traffic should be a FIN request or IP pkt with options
> + */
> +static int handle_exception_pkt(struct nes_cm_node *node_p,
> +		struct sk_buff *skb_p)
> +{
> +	int ret = 0;
> +	struct tcphdr *tcphdr_p = skb_p->h.th;

kill all "_p" suffixes


> +	/* first check to see if this a FIN pkt */
> +	if (tcphdr_p->fin) {
> +		/* we need to ACK the FIN request */
> +		send_ack(node_p);
> +
> +		/* check which side we are (client/server) and set next state accordingly */
> +		if (node_p->tcp_cntxt.client)
> +			node_p->state = NES_CM_STATE_CLOSING;
> +		else {
> +			/* we are the server side */
> +			node_p->state = NES_CM_STATE_CLOSE_WAIT;
> +			/* since this is a self contained CM we don't wait for */
> +			/* an APP to close us, just send final FIN immediately */
> +			ret = send_fin(node_p, NULL);
> +			node_p->state = NES_CM_STATE_LAST_ACK;
> +		}
> +	} else {
> +		ret = -EINVAL;

why is this TCP management in this driver?


> + * form_cm_frame - get a free packet and build empty frame Use
> + * node info to build.
> + */
> +struct sk_buff *form_cm_frame(struct sk_buff *skb_p, struct nes_cm_node *node_p,
> +		void *options, u32 optionsize, void *data, u32 datasize, u8 flags)
> +{
> +	struct tcphdr *tcphdr_p;
> +	struct iphdr *iphdr_p;
> +	struct ethhdr *ethhdr_p;
> +	u8 *buf_p;

'buf' should be void* not u8*


> +	u16 packetsize = sizeof(*iphdr_p);

kill suffixes


> +	packetsize += sizeof(*tcphdr_p);
> +	packetsize +=  optionsize + datasize;
> +
> +	memset(skb_p->data, 0x00, ETH_HLEN + sizeof(*iphdr_p) + sizeof(*tcphdr_p));
> +
> +	skb_p->len = 0;

why?  are you reusing or abusing the skb somehow?


> +	buf_p = skb_put(skb_p, packetsize + ETH_HLEN);
> +
> +	ethhdr_p = (struct ethhdr *) buf_p;
> +	buf_p += ETH_HLEN;
> +
> +	iphdr_p = skb_p->nh.iph = (struct iphdr *)buf_p;
> +	buf_p += sizeof(*iphdr_p);
> +
> +	tcphdr_p  = skb_p->h.th = (struct tcphdr *) buf_p;
> +	buf_p += sizeof(*tcphdr_p);
> +
> +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20))
> +	skb_p->ip_summed = CHECKSUM_HW;
> +#else
> +	skb_p->ip_summed = CHECKSUM_PARTIAL;
> +#endif
> +	skb_p->protocol = ntohs(0x800);
> +	skb_p->data_len = 0;
> +	skb_p->mac.raw = skb_p->data;
> +	skb_p->mac_len = ETH_HLEN;
> +
> +	memcpy(ethhdr_p->h_dest, node_p->rem_mac, ETH_ALEN);
> +	memcpy(ethhdr_p->h_source, node_p->loc_mac, ETH_ALEN);
> +	ethhdr_p->h_proto = htons(0x0800);
> +
> +	iphdr_p->version = IPVERSION;
> +	iphdr_p->ihl = 5;		/* 5 * 4Byte words, IP headr len */
> +	iphdr_p->tos = 0;
> +	iphdr_p->tot_len = htons(packetsize);
> +	iphdr_p->id = htons(++node_p->tcp_cntxt.loc_id);
> +	
> +	iphdr_p->frag_off = ntohs(0x4000);
> +	iphdr_p->ttl = 0x40;
> +	iphdr_p->protocol= 0x06;	/* IPPROTO_TCP */
> +
> +	iphdr_p->saddr = htonl(node_p->loc_addr);
> +	iphdr_p->daddr = htonl(node_p->rem_addr);
> +
> +	tcphdr_p->source = htons(node_p->loc_port);
> +	tcphdr_p->dest = htons(node_p->rem_port);
> +	tcphdr_p->seq = htonl(node_p->tcp_cntxt.loc_seq_num);
> +
> +	if (flags & SET_ACK) {
> +		node_p->tcp_cntxt.loc_ack_num = node_p->tcp_cntxt.rcv_nxt;
> +		tcphdr_p->ack_seq = htonl(node_p->tcp_cntxt.loc_ack_num);
> +		tcphdr_p->ack = 1;
> +	} else
> +		tcphdr_p->ack_seq = 0;
> +
> +	if (flags & SET_SYN) {
> +		node_p->tcp_cntxt.loc_seq_num ++;
> +		tcphdr_p->syn = 1;
> +	} else
> +		node_p->tcp_cntxt.loc_seq_num += datasize;	/* data (no headers) */
> +
> +	dprintk("%s[%u] Local seq # now %x\n", __FUNCTION__, __LINE__,
> +			node_p->tcp_cntxt.loc_seq_num);
> +	if (flags & SET_FIN)
> +		tcphdr_p->fin = 1;
> +
> +	if (flags & SET_RST)
> +		tcphdr_p->rst = 1;
> +
> +	tcphdr_p->doff = (u16) ((sizeof(*tcphdr_p) + optionsize + 3)>> 2);
> +	tcphdr_p->window = htons(node_p->tcp_cntxt.rcv_wnd);
> +	tcphdr_p->urg_ptr = 0;
> +	if (optionsize)
> +		memcpy(buf_p, options, optionsize);
> +	buf_p += optionsize;
> +	if (datasize)
> +		memcpy(buf_p, data, datasize);
> +
> +	skb_shinfo(skb_p)->nr_frags = 0;
> +
> +	return(skb_p);

creating TCP packets by hand?


> +static void dump_pkt(struct sk_buff *skb_p)
> +{
> +	u8 *pkt_p;
> +
> +	if (!skb_p)
> +		return;
> +
> +	pkt_p = (u8 *)skb_p->data;
> +	/* dprintk("skb_p->head=%p, data=%p, tail=%p, end=%p,"
> +			"skb_p->len=%u, data_len=%u\n",
> +			skb_p->head, skb_p->data, skb_p->tail, skb_p->end,
> +			skb_p->len, skb_p->data_len);
> +	*/
> +	nes_dump_mem(pkt_p, skb_p->len);
> +
> +	return;
> +}
> +
> +
> +/**
> + * print_core - dump a cm core
> + */
> +static void print_core(struct nes_cm_core *core_p)
> +{
> +	dprintk("---------------------------------------------\n");
> +	dprintk("CM Core  -- (core_p = %p )\n", core_p);
> +	if (!core_p)
> +		return;
> +	dprintk("---------------------------------------------\n");
> +	dprintk("Session ID    : %u \n", atomic_read(&core_p->session_id));
> +
> +	dprintk("State         : %u \n",  core_p->state);
> +
> +	dprintk("Tx Free cnt   : %u \n", skb_queue_len(&core_p->tx_free_list));
> +	dprintk("Listen Nodes  : %u \n", atomic_read(&core_p->listen_node_cnt));
> +	dprintk("Active Nodes  : %u \n", atomic_read(&core_p->node_cnt));
> +
> +	dprintk("core_p        : %p \n",  core_p);
> +
> +	dprintk("-------------- end core ---------------\n");
> +	return;

kill all pointless "return;" at the end of functions


> +int schedule_nes_timer(struct nes_cm_node *node_p, struct sk_buff *skb_p,
> +						enum nes_timer_type type, int send_retrans)
> +{
> +	unsigned long  flags;
> +	struct nes_cm_core *core_p;
> +	struct nes_timer_entry *new_send;
> +	int ret = 0;
> +	u32 was_timer_set;
> +
> +	new_send = kzalloc(sizeof(struct nes_timer_entry), GFP_ATOMIC);
> +	if(!new_send)
> +		return -1;
> +	/* new_send->timetosend = currenttime */
> +	new_send->retrycount = NES_DEFAULT_RETRYS;
> +	new_send->retranscount = NES_DEFAULT_RETRANS;
> +	new_send->skb = skb_p;
> +	new_send->timetosend = jiffies;
> +	new_send->type = type;
> +	new_send->netdev = node_p->netdev_p;
> +	new_send->send_retrans = send_retrans;
> +
> +	if(type == NES_TIMER_TYPE_CLOSE) {
> +		dprintk("Scheduling Close: node_p = %p, new_send = %p.\n", node_p, new_send);
> +		new_send->timetosend += (HZ/2); /* TODO: decide on the correct value here */
> +		spin_lock_irqsave(&node_p->recv_list_lock, flags);
> +		list_add_tail(&new_send->list, &node_p->recv_list);
> +		spin_unlock_irqrestore(&node_p->recv_list_lock, flags);
> +	}
> +
> +	if(type == NES_TIMER_TYPE_SEND)	{
> +		dprintk("Sending Packet %p:\n", new_send);
> +		new_send->seq_num = htonl(skb_p->h.th->seq);
> +		dump_pkt(skb_p);
> +		spin_lock_irqsave(&node_p->retrans_list_lock, flags);
> +		list_add_tail(&new_send->list, &node_p->retrans_list);
> +		spin_unlock_irqrestore(&node_p->retrans_list_lock, flags);
> +	}
> +	if(type == NES_TIMER_TYPE_RECV)	{
> +		new_send->seq_num = htonl(skb_p->h.th->seq);
> +		spin_lock_irqsave(&node_p->recv_list_lock, flags);
> +		list_add_tail(&new_send->list, &node_p->recv_list);
> +		spin_unlock_irqrestore(&node_p->recv_list_lock, flags);
> +	}

the lack of 'else' keywords implies these more than one of these 
conditions can occur simultaneously.  If so, don't you think it's quite 
expensive to grab and release all these locks?


> +	core_p = node_p->core_p;
> +
> +	was_timer_set = timer_pending(&core_p->tcp_timer);
> +
> +	if(!was_timer_set || time_before(new_send->timetosend,
> +									core_p->tcp_timer.expires)){
> +		if(was_timer_set) {
> +			del_timer(&core_p->tcp_timer);
> +		}

single C statement: remove braces

> +		core_p->tcp_timer.expires = new_send->timetosend;
> +
> +		add_timer(&core_p->tcp_timer);
> +	}
> +	return(ret);

return not a function


> +/**
> + * nes_cm_timer_tick
> + */
> +void nes_cm_timer_tick(unsigned long pass)

this seems like a poor design.  AFAICS you should be using delayed 
workqueues or something other than a timer.



> +	unsigned long flags, qplockflags;
> +	unsigned long nexttimeout = jiffies + NES_LONG_TIME;
> +	struct iw_cm_id *cm_id;
> +	struct nes_cm_node *node_p;
> +	struct nes_timer_entry *send_entry, *recv_entry;
> +	struct list_head *list_p_core, *list_p_core_temp, *list_p_node_temp, *list_p_node;
> +	struct nes_cm_core *core_p = g_cm_core_p;
> +	struct nes_qp *nesqp;
> +	u32 settimer = 0;
> +	int ret = NETDEV_TX_OK;
> +
> +	list_for_each_safe(list_p_node, list_p_core_temp, &core_p->connected_nodes) {
> +		node_p = container_of(list_p_node, struct nes_cm_node, list);
> +		spin_lock_irqsave(&node_p->recv_list_lock, flags);
> +		list_for_each_safe(list_p_core, list_p_node_temp, &node_p->recv_list) {
> +			recv_entry = container_of(list_p_core, struct nes_timer_entry, list);
> +			if ((time_after(recv_entry->timetosend, jiffies)) &&
> +					(recv_entry->type == NES_TIMER_TYPE_CLOSE)) {
> +				if(nexttimeout > recv_entry->timetosend || !settimer) {
> +					nexttimeout = recv_entry->timetosend;
> +					settimer = 1;
> +				}
> +				continue;
> +			}
> +			list_del(&recv_entry->list);
> +			cm_id = node_p->cm_id;
> +			spin_unlock_irqrestore(&node_p->recv_list_lock, flags);
> +			if(recv_entry->type == NES_TIMER_TYPE_CLOSE) {
> +				nesqp = (struct nes_qp *)recv_entry->skb;
> +				cm_id->rem_ref(cm_id);
> +				spin_lock_irqsave(&nesqp->lock, qplockflags);
> +				if (nesqp->cm_id) {
> +					dprintk("%s: QP%u: cm_id = %p: ****** HIT A NES_TIMER_TYPE_CLOSE"
> +							" with something to do!!! ******\n",
> +							__FUNCTION__, nesqp->hwqp.qp_id, cm_id);
> +					nesqp->hw_tcp_state = NES_AEQE_TCP_STATE_CLOSED;
> +					nesqp->last_aeq = NES_AEQE_AEID_RESET_SENT;
> +					nesqp->ibqp_state = IB_QPS_ERR;
> +					spin_unlock_irqrestore(&nesqp->lock, qplockflags);
> +					nes_cm_disconn(nesqp);
> +				} else {
> +					spin_unlock_irqrestore(&nesqp->lock, qplockflags);
> +					dprintk("%s: QP%u: cm_id = %p: ****** HIT A NES_TIMER_TYPE_CLOSE"
> +							" with nothing to do!!! ******\n",
> +							__FUNCTION__, nesqp->hwqp.qp_id, cm_id);
> +					nes_rem_ref(&nesqp->ibqp);
> +				}
> +			}
> +			else if(recv_entry->type == NES_TIMER_TYPE_RECV) {
> +				dprintk("Processing Packet (%p):\n", recv_entry->skb->data);
> +				dump_pkt(recv_entry->skb);
> +				process_packet(node_p, recv_entry->skb, core_p);
> +				dev_kfree_skb_any(recv_entry->skb);
> +			}
> +			kfree(recv_entry);
> +			spin_lock_irqsave(&node_p->recv_list_lock, flags);
> +		}
> +		spin_unlock_irqrestore(&node_p->recv_list_lock, flags);
> +
> +		spin_lock_irqsave(&node_p->retrans_list_lock, flags);
> +		list_for_each_safe(list_p_core, list_p_node_temp, &node_p->retrans_list) {
> +			send_entry = container_of(list_p_core, struct nes_timer_entry, list);
> +			if(time_after(send_entry->timetosend, jiffies)) {
> +				if(nexttimeout > send_entry->timetosend || !settimer) {
> +					nexttimeout = send_entry->timetosend;
> +					settimer = 1;
> +				}
> +				continue;
> +			}
> +			list_del(&send_entry->list);
> +			spin_unlock_irqrestore(&node_p->retrans_list_lock, flags);
> +			if(send_entry->type == NES_TIMER_NODE_CLEANUP){
> +				dprintk("!send - %p-> next/prev=%p,%p, tts=%lx, skb=%p, type=%x,"
> +						" retry=%x, retrans=%x, context=%x, seq=%x\n",
> +						send_entry, send_entry->list.next, send_entry->list.prev,
> +						send_entry->timetosend, send_entry->skb, send_entry->type,
> +						send_entry->retrycount, send_entry->retranscount,
> +						send_entry->context, send_entry->seq_num);
> +				spin_lock_irqsave(&node_p->retrans_list_lock, flags);
> +				continue;
> +			}
> +			if(send_entry->seq_num < node_p->tcp_cntxt.rem_ack_num ||
> +				node_p->accelerated) {
> +					dev_kfree_skb_any(send_entry->skb);
> +					kfree(send_entry);
> +					spin_lock_irqsave(&node_p->retrans_list_lock, flags);
> +					continue;
> +			}
> +
> +			if(!send_entry->retranscount || !send_entry->retrycount) {
> +				dev_kfree_skb_any(send_entry->skb);
> +				kfree(send_entry);
> +				create_event(node_p, NES_CM_EVENT_ABORTED);
> +				spin_lock_irqsave(&node_p->retrans_list_lock, flags);
> +				continue;
> +			}
> +			atomic_inc(&send_entry->skb->users);
> +			ret = nes_nic_cm_xmit(send_entry->skb, node_p->netdev_p);
> +			if(ret != NETDEV_TX_OK) {
> +				atomic_dec(&send_entry->skb->users);
> +				send_entry->retrycount--;
> +				nexttimeout = jiffies + NES_SHORT_TIME;
> +				settimer = 1;
> +				spin_lock_irqsave(&node_p->retrans_list_lock, flags);
> +				list_add(&send_entry->list, &node_p->retrans_list);
> +				break;
> +			}
> +			dprintk("Packet Sent:\n");
> +			dump_pkt(send_entry->skb);
> +			if(send_entry->send_retrans) {
> +				send_entry->retranscount--;
> +				send_entry->timetosend = jiffies + NES_RETRY_TIMEOUT;
> +				if(nexttimeout > send_entry->timetosend || !settimer) {
> +					nexttimeout = send_entry->timetosend;
> +					settimer = 1;
> +				}
> +				spin_lock_irqsave(&node_p->retrans_list_lock, flags);
> +				list_add(&send_entry->list, &node_p->retrans_list);
> +				continue;
> +			}
> +			else {
> +				dev_kfree_skb_any(send_entry->skb);
> +				kfree(send_entry);
> +				spin_lock_irqsave(&node_p->retrans_list_lock, flags);
> +				continue;
> +			}
> +		}
> +		spin_unlock_irqrestore(&node_p->retrans_list_lock, flags);
> +
> +		if(ret != NETDEV_TX_OK)
> +			break;
> +	}
> +
> +	if(settimer)
> +	{
> +		if(timer_pending(&core_p->tcp_timer)) {
> +			del_timer(&core_p->tcp_timer);
> +		}
> +		core_p->tcp_timer.expires  = nexttimeout;
> +		add_timer(&core_p->tcp_timer);

are you reinventing mod_timer() ?

I stopped reviewing here.  Please make all the style changes so that we 
can review your driver in depth.

All the debug-print code makes the driver highly unreadable.  Most of 
that should go away now that you are moving towards a kernel submission.

	Jeff



^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox