public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] e1000 rx buffer allocation
@ 2004-08-26 23:35 shane
  2004-08-27  1:18 ` David S. Miller
  2004-08-27  1:18 ` David S. Miller
  0 siblings, 2 replies; 6+ messages in thread
From: shane @ 2004-08-26 23:35 UTC (permalink / raw)
  To: linux-kernel

[-- Attachment #1: Type: TEXT/PLAIN, Size: 2013 bytes --]

I've written a patch to the e1000 driver.  The patch fixes the memory 
allocation problems reported earlier:

http://seclists.org/lists/linux-kernel/2004/Jan/0043.html

Background: my group is scanning microfilms at high resolution.  We're 
trying to receive 50MB/s from a TCP socket and spit it out to a RAID 0 
array.  It works for a few minutes, but then the e1000 driver starts 
failing to allocate new buffers in the ring, we lose packets, and the 
connection breaks down because we can't accept the data quickly enough. 
I don't know whether we're ever actually running out of rx buffers (we're 
using 4096 buffers, the maximum), but the warning from the kernel is scary 
enough that it's worth fixing.

When e1000 is using jumbo frames (mtu=9000), the driver tries to allocate 
just over 16k for each receive buffer.  Allocating so much memory inside 
an interrupt has a good chance of failing.  So my patch changes the driver 
to schedule rx buffer allocation in a sleep-capable context, using 
schedule_work().  Now I see no more problems with the e1000 driver.  The 
box is a dual Xeon with 2 GB of RAM, but the patch also works fine on my 
laptop.

I wouldn't be surprised if someone else has already fixed this problem, 
but if they have, I haven't found the patch.  To achieve reliably high 
transfer rates with jumbo frames, it just doesn't seem to make sense to 
alloc_skb() in interrupt context.

The patch is against 2.6.8.1.  I can move it to 2.6.9-rc1 if it would 
help.  I also added a printk() to the driver that tells you when 10 or 
more rx buffers have been allocated at once.  The message is useful for 
figuring out the right RxDescriptors option value, but we probably need a 
better way to report this information.

Independently of my patch, I'm a little concerned about what will happen 
if the driver runs out of rx buffers.  Ideally, it will just drop packets, 
but I wonder if the code will panic instead.

BTW, this is my first post on this list.  Please CC me on replies.

Shane

[-- Attachment #2: Type: TEXT/PLAIN, Size: 5082 bytes --]

--- e1000.orig/e1000_main.c	2004-08-14 04:55:10.000000000 -0600
+++ e1000/e1000_main.c	2004-08-26 12:38:30.602191640 -0600
@@ -143,7 +143,9 @@
 #else
 static boolean_t e1000_clean_rx_irq(struct e1000_adapter *adapter);
 #endif
-static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter);
+static void e1000_alloc_rx_buffers(struct e1000_adapter *adapter,
+				   int from_irq);
+static void e1000_alloc_task(struct e1000_adapter *adapter);
 static int e1000_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd);
 static int e1000_mii_ioctl(struct net_device *netdev, struct ifreq *ifr,
 			   int cmd);
@@ -263,7 +265,7 @@
 	e1000_configure_tx(adapter);
 	e1000_setup_rctl(adapter);
 	e1000_configure_rx(adapter);
-	e1000_alloc_rx_buffers(adapter);
+	e1000_alloc_rx_buffers(adapter, 0);
 
 	if((err = request_irq(adapter->pdev->irq, &e1000_intr,
 		              SA_SHIRQ | SA_SAMPLE_RANDOM,
@@ -286,6 +288,10 @@
 	del_timer_sync(&adapter->tx_fifo_stall_timer);
 	del_timer_sync(&adapter->watchdog_timer);
 	del_timer_sync(&adapter->phy_info_timer);
+	while (adapter->alloc_scheduled) {
+		printk("waiting for e1000_alloc_task...\n");
+		mdelay(5);
+	}
 	adapter->link_speed = 0;
 	adapter->link_duplex = 0;
 	netif_carrier_off(netdev);
@@ -530,6 +536,9 @@
 	adapter->phy_info_timer.function = &e1000_update_phy_info;
 	adapter->phy_info_timer.data = (unsigned long) adapter;
 
+	INIT_WORK(&adapter->alloc_task,
+		  (void (*)(void *))e1000_alloc_task, adapter);
+
 	INIT_WORK(&adapter->tx_timeout_task,
 		(void (*)(void *))e1000_tx_timeout_task, netdev);
 
@@ -1203,7 +1212,7 @@
 
 	if(netif_running(netdev)) {
 		e1000_configure_rx(adapter);
-		e1000_alloc_rx_buffers(adapter);
+		e1000_alloc_rx_buffers(adapter, 0);
 	}
 }
 
@@ -2356,7 +2365,7 @@
 
 	rx_ring->next_to_clean = i;
 
-	e1000_alloc_rx_buffers(adapter);
+	e1000_alloc_rx_buffers(adapter, 1);
 
 	return cleaned;
 }
@@ -2364,10 +2373,11 @@
 /**
  * e1000_alloc_rx_buffers - Replace used receive buffers
  * @adapter: address of board private structure
+ * @from_irq: 1 if called from an interrupt, 0 if not
  **/
 
 static void
-e1000_alloc_rx_buffers(struct e1000_adapter *adapter)
+e1000_alloc_rx_buffers(struct e1000_adapter *adapter, int from_irq)
 {
 	struct e1000_desc_ring *rx_ring = &adapter->rx_ring;
 	struct net_device *netdev = adapter->netdev;
@@ -2376,17 +2386,21 @@
 	struct e1000_buffer *buffer_info;
 	struct sk_buff *skb;
 	unsigned int i;
+	int need_alloc = 0;
 
 	i = rx_ring->next_to_use;
 	buffer_info = &rx_ring->buffer_info[i];
 
 	while(!buffer_info->skb) {
 		rx_desc = E1000_RX_DESC(*rx_ring, i);
-
-		skb = dev_alloc_skb(adapter->rx_buffer_len + NET_IP_ALIGN);
-
+		skb = buffer_info->future_skb;
+		if(!skb && !from_irq) {
+			skb = alloc_skb(adapter->rx_buffer_len + NET_IP_ALIGN,
+					GFP_KERNEL);
+		}
 		if(!skb) {
-			/* Better luck next round */
+			/* Wait for the timer */
+			need_alloc = 1;
 			break;
 		}
 
@@ -2399,6 +2413,7 @@
 		skb->dev = netdev;
 
 		buffer_info->skb = skb;
+		buffer_info->future_skb = NULL;
 		buffer_info->length = adapter->rx_buffer_len;
 		buffer_info->dma =
 			pci_map_single(pdev,
@@ -2423,9 +2438,47 @@
 	}
 
 	rx_ring->next_to_use = i;
+	if (need_alloc && !adapter->alloc_scheduled) {
+	   	adapter->alloc_scheduled = 1;
+		schedule_work(&adapter->alloc_task);
+	}
 }
 
 /**
+ * e1000_alloc_task - Allocate skbs to be consumed by e1000_alloc_rx_buffers
+ * @adapter: address of board private structure
+ **/
+
+static void e1000_alloc_task(struct e1000_adapter *adapter) {
+	struct e1000_desc_ring *rx_ring = &adapter->rx_ring;
+	struct e1000_buffer *buffer_info;
+	struct sk_buff *skb;
+	unsigned int i;
+	int count = 0;
+	
+	i = rx_ring->next_to_use;
+	buffer_info = &rx_ring->buffer_info[i];
+
+	while(!buffer_info->skb && !buffer_info->future_skb) {
+		skb = alloc_skb(adapter->rx_buffer_len + NET_IP_ALIGN,
+				GFP_KERNEL);
+		if(!skb) {
+		  /* Better luck next round */
+		  break;
+		}
+		count++;
+		buffer_info->future_skb = skb;
+		if(++i == rx_ring->count) i = 0;
+		buffer_info = &rx_ring->buffer_info[i];
+	}
+	if(count >= 10)
+	  printk("e1000: allocated %d buffers\n", count);
+	adapter->alloc_scheduled = 0;
+}
+
+
+
+/**
  * e1000_smartspeed - Workaround for SmartSpeed on 82541 and 82547 controllers.
  * @adapter:
  **/
--- e1000.orig/e1000.h	2004-08-14 04:54:47.000000000 -0600
+++ e1000/e1000.h	2004-08-26 12:25:04.000000000 -0600
@@ -151,7 +151,7 @@
 /* wrapper around a pointer to a socket buffer,
  * so a DMA handle can be stored along with the buffer */
 struct e1000_buffer {
-	struct sk_buff *skb;
+	struct sk_buff *skb, *future_skb;
 	uint64_t dma;
 	unsigned long length;
 	unsigned long time_stamp;
@@ -202,6 +202,8 @@
 	spinlock_t stats_lock;
 	atomic_t irq_sem;
 	struct work_struct tx_timeout_task;
+	struct work_struct alloc_task;
+	int alloc_scheduled;
     	uint8_t fc_autoneg;
 
 	struct timer_list blink_timer;

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2004-08-28  2:27 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-08-26 23:35 [PATCH] e1000 rx buffer allocation shane
2004-08-27  1:18 ` David S. Miller
2004-08-27  1:18 ` David S. Miller
2004-08-27 18:36   ` Chris Leech
2004-08-27 19:36     ` David S. Miller
2004-08-28  2:27     ` Shane Hathaway

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox