[PATCH] Extend lock less TX to real devices

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Andi Kleen <ak@muc.de>
To: davem@redhat.com, kuznet@ms2.inr.ac.ru, netdev@oss.sgi.com,
	akepner@sgi.com
Subject: [PATCH] Extend lock less TX to real devices
Date: Tue, 31 Aug 2004 14:38:20 +0200	[thread overview]
Message-ID: <m38ybvjxdv.fsf@averell.firstfloor.org> (raw)


This patch extends the recently added NETIF_F_LLTX to real devices.
A lot of modern network drivers have a single big lock around their 
hard_start_xmit, and it doesn't make sense to take the xmit lock too.
They also have enough locking to handle setting of multicast lists
properly.

Now they can set NETIF_F_LLTX and get one lock less.  For drivers
that don't set this flag nothing changes.

I added support for trylocking instead of spinning like sch_generic
does - for that the driver has to return -1, then the packet is requeued.
The check for a local device deadlock is lost for this case, 
but that doesn't seem to be a big loss (I've never seen this printk 
ever get triggered)

The patch looks bigger than it really is because i moved some code
around and converted the macros into inlines. 

I also did an additional micro optimization: on a preemptive kernel
dev_queue_xmit would change the local atomic count several times
in the hot path. This patch changes it to disable preemption only once,
to hopefully make it a bit faster.

I changed the lltx handling to not change xmit_lock_owner.
Without a lock it is not safe anyways and it will prevent
another cache line from bouncing.

With only this patch nothing changes because no driver uses
the new flag yet.

-Andi

diff -u linux-2.6.8-work/net/core/dev.c-o linux-2.6.8-work/net/core/dev.c
--- linux-2.6.8-work/net/core/dev.c-o	2004-08-05 04:31:12.000000000 +0200
+++ linux-2.6.8-work/net/core/dev.c	2004-08-31 13:25:35.000000000 +0200
@@ -1255,19 +1255,24 @@
 	return 0;
 }
 
-#define HARD_TX_LOCK_BH(dev, cpu) {			\
-	if ((dev->features & NETIF_F_LLTX) == 0) {	\
-		spin_lock_bh(&dev->xmit_lock);		\
-		dev->xmit_lock_owner = cpu;		\
-	}						\
-}
+/* Preemption must be off here. A driver setting LLTX has to 
+   use interrupt safe locking. */
 
-#define HARD_TX_UNLOCK_BH(dev) {			\
-	if ((dev->features & NETIF_F_LLTX) == 0) {	\
-		dev->xmit_lock_owner = -1;		\
-		spin_unlock_bh(&dev->xmit_lock);	\
-	}						\
-}
+static inline int hard_tx_lock(struct net_device *dev) 
+{ 
+	if (dev->features & NETIF_F_LLTX)
+		return 1; 
+	spin_lock(&dev->xmit_lock);
+	dev->xmit_lock_owner = smp_processor_id(); 
+} 
+
+static inline void hard_tx_unlock(struct net_device *dev) 
+{ 
+	if (dev->features & NETIF_F_LLTX) 
+		return; 
+	dev->xmit_lock_owner = -1;
+	spin_unlock(&dev->xmit_lock);	
+} 
 
 static inline void qdisc_run(struct net_device *dev)
 {
@@ -1319,7 +1324,15 @@
 	      	if (skb_checksum_help(&skb, 0))
 	      		goto out_kfree_skb;
 
-	rcu_read_lock();
+	/* 
+	 * Various locks need a _bh disable and there are CPU local
+	 * data structures too. Instead of changing the preempt count
+	 * with each lock just grab it here once for the whole
+	 * function. This also gives the require atomicity for the RCU
+	 * use below.
+	 */ 
+	local_bh_disable();
+
 	/* Updates of qdisc are serialized by queue_lock. 
 	 * The struct Qdisc which is pointed to by qdisc is now a 
 	 * rcu structure - it may be accessed without acquiring 
@@ -1339,18 +1352,17 @@
 #endif
 	if (q->enqueue) {
 		/* Grab device queue */
-		spin_lock_bh(&dev->queue_lock);
+		spin_lock(&dev->queue_lock);
 
 		rc = q->enqueue(skb, q);
 
 		qdisc_run(dev);
 
-		spin_unlock_bh(&dev->queue_lock);
-		rcu_read_unlock();
+		spin_unlock(&dev->queue_lock);
 		rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
 		goto out;
 	}
-	rcu_read_unlock();
+
 
 	/* The device has no queue. Common case for software devices:
 	   loopback, all the sorts of tunnels...
@@ -1363,32 +1375,27 @@
 
 	   Check this and shot the lock. It is not prone from deadlocks.
 	   Either shot noqueue qdisc, it is even simpler 8)
+
+	   BHs are still disabled here
 	 */
 	if (dev->flags & IFF_UP) {
-		int cpu = get_cpu();
-
-		if (dev->xmit_lock_owner != cpu) {
-
-			HARD_TX_LOCK_BH(dev, cpu);
-			put_cpu();
-
+		if (hard_tx_lock(dev)) { 
 			if (!netif_queue_stopped(dev)) {
 				if (netdev_nit)
 					dev_queue_xmit_nit(skb, dev);
 
 				rc = 0;
 				if (!dev->hard_start_xmit(skb, dev)) {
-					HARD_TX_UNLOCK_BH(dev);
+					hard_tx_unlock(dev);
 					goto out;
 				}
 			}
-			HARD_TX_UNLOCK_BH(dev);
+			hard_tx_unlock(dev);
 			if (net_ratelimit())
 				printk(KERN_CRIT "Virtual device %s asks to "
 				       "queue packet!\n", dev->name);
 			goto out_enetdown;
 		} else {
-			put_cpu();
 			/* Recursion is detected! It is possible,
 			 * unfortunately */
 			if (net_ratelimit())
@@ -1401,6 +1408,7 @@
 out_kfree_skb:
 	kfree_skb(skb);
 out:
+	local_bh_enable();
 	return rc;
 }
 
diff -u linux-2.6.8-work/net/core/pktgen.c-o linux-2.6.8-work/net/core/pktgen.c
--- linux-2.6.8-work/net/core/pktgen.c-o	2004-08-05 04:31:12.000000000 +0200
+++ linux-2.6.8-work/net/core/pktgen.c	2004-08-15 17:51:22.000000000 +0200
@@ -629,8 +629,9 @@
 		}
 
 		nr_frags = skb_shinfo(skb)->nr_frags;
-		   
-		spin_lock_bh(&odev->xmit_lock);
+
+		if (!(odev->features & NETIF_F_LLTX))
+			spin_lock_bh(&odev->xmit_lock);
 		if (!netif_queue_stopped(odev)) {
 
 			atomic_inc(&skb->users);
@@ -655,8 +656,8 @@
 			last_ok = 0;
 		}
 		
-
-		spin_unlock_bh(&odev->xmit_lock);
+		if (!(odev->features & NETIF_F_LLTX))
+			spin_unlock_bh(&odev->xmit_lock);
 
 		if (info->ipg) {
 			/* Try not to busy-spin if we have larger sleep times.
diff -u linux-2.6.8-work/net/sched/sch_generic.c-o linux-2.6.8-work/net/sched/sch_generic.c
--- linux-2.6.8-work/net/sched/sch_generic.c-o	2004-08-13 03:44:03.000000000 +0200
+++ linux-2.6.8-work/net/sched/sch_generic.c	2004-08-31 13:13:36.000000000 +0200
@@ -68,6 +68,34 @@
 	write_unlock_bh(&qdisc_tree_lock);
 }
 
+static int qdisc_coll(struct net_device *dev, struct sk_buff *skb)
+{
+	/* So, someone grabbed the driver. */
+	
+	/* It may be transient configuration error,
+	   when hard_start_xmit() recurses. We detect
+	   it by checking xmit owner and drop the
+	   packet when deadloop is detected.
+	*/
+	if (dev->xmit_lock_owner == smp_processor_id()) {
+		kfree_skb(skb);
+		if (net_ratelimit())
+			printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
+		return -1;
+	}
+	__get_cpu_var(netdev_rx_stat).cpu_collision++;
+	return 0;
+}
+
+static inline void qdisc_unlock_driver(struct net_device *dev)
+{
+	if (!(dev->features & NETIF_F_LLTX)) { 
+		dev->xmit_lock_owner = -1;
+		spin_unlock(&dev->xmit_lock);
+	}
+	spin_lock(&dev->queue_lock);
+}
+
 /* 
    dev->queue_lock serializes queue accesses for this device
    AND dev->qdisc pointer itself.
@@ -97,50 +125,49 @@
 
 	/* Dequeue packet */
 	if ((skb = q->dequeue(q)) != NULL) {
-		if (spin_trylock(&dev->xmit_lock)) {
+		/* 
+		 * When the driver has LLTX set it does its own locking 
+		 * in start_xmit. No need to add additional overhead by
+		 * locking again. These checks are worth it because
+		 * even uncongested locks can be quite expensive.
+		 */
+		if ((dev->features & NETIF_F_LLTX) == 0) { 
+			if (!spin_trylock(&dev->xmit_lock)) {
+				if (qdisc_coll(dev, skb) < 0) 
+					return -1;
+				goto out; 
+				
+			}
 			/* Remember that the driver is grabbed by us. */
 			dev->xmit_lock_owner = smp_processor_id();
-
-			/* And release queue */
-			spin_unlock(&dev->queue_lock);
-
-			if (!netif_queue_stopped(dev)) {
-				if (netdev_nit)
-					dev_queue_xmit_nit(skb, dev);
-
-				if (dev->hard_start_xmit(skb, dev) == 0) {
-					dev->xmit_lock_owner = -1;
-					spin_unlock(&dev->xmit_lock);
-
-					spin_lock(&dev->queue_lock);
-					return -1;
+		}
+		
+		/* And release queue */
+		spin_unlock(&dev->queue_lock);
+		
+		if (!netif_queue_stopped(dev)) {
+			int ret;
+
+			if (netdev_nit)
+				dev_queue_xmit_nit(skb, dev);
+			
+			ret = dev->hard_start_xmit(skb, dev);
+			if (ret <= 0) { 
+				qdisc_unlock_driver(dev);
+				/* requeue in case of lock collision */
+				if (ret < 0) {
+					__get_cpu_var(netdev_rx_stat).cpu_collision++;
+					goto out; 
 				}
-			}
-
-			/* Release the driver */
-			dev->xmit_lock_owner = -1;
-			spin_unlock(&dev->xmit_lock);
-			spin_lock(&dev->queue_lock);
-			q = dev->qdisc;
-		} else {
-			/* So, someone grabbed the driver. */
-
-			/* It may be transient configuration error,
-			   when hard_start_xmit() recurses. We detect
-			   it by checking xmit owner and drop the
-			   packet when deadloop is detected.
-			 */
-			if (dev->xmit_lock_owner == smp_processor_id()) {
-				kfree_skb(skb);
-				if (net_ratelimit())
-					printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name);
 				return -1;
 			}
-			__get_cpu_var(netdev_rx_stat).cpu_collision++;
 		}
 
+		qdisc_unlock_driver(dev);
+		q = dev->qdisc;
+
 		/* Device kicked us out :(
-		   This is possible in three cases:
+		   This is possible in four cases:
 
 		   0. driver is locked
 		   1. fastroute is enabled
@@ -149,6 +176,7 @@
 		   3. device is buggy (ppp)
 		 */
 
+	out:
 		q->ops->requeue(skb, q);
 		netif_schedule(dev);
 		return 1;

next             reply	other threads:[~2004-08-31 12:38 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-08-31 12:38 Andi Kleen [this message]
2004-09-02  5:33 ` [PATCH] Extend lock less TX to real devices David S. Miller
2004-09-04 13:28   ` Andi Kleen
2004-09-04 14:11     ` jamal
2004-09-04 14:24       ` Andi Kleen
2004-09-04 19:39     ` Herbert Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=m38ybvjxdv.fsf@averell.firstfloor.org \
    --to=ak@muc.de \
    --cc=akepner@sgi.com \
    --cc=davem@redhat.com \
    --cc=kuznet@ms2.inr.ac.ru \
    --cc=netdev@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.