[PATCH 1/2] r8169: allocate with GFP_KERNEL flag when able to sleep

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH 1/2] r8169: allocate with GFP_KERNEL flag when able to sleep
@ 2010-10-08 14:25 Stanislaw Gruszka
  2010-10-08 14:25 ` [PATCH 2/2] r8169: use device model DMA API Stanislaw Gruszka
                   ` (2 more replies)
  0 siblings, 3 replies; 18+ messages in thread
From: Stanislaw Gruszka @ 2010-10-08 14:25 UTC (permalink / raw)
  To: Francois Romieu, netdev; +Cc: Stanislaw Gruszka

We have fedora bug report where driver fail to initialize after
suspend/resume because of memory allocation errors:
https://bugzilla.redhat.com/show_bug.cgi?id=629158

To fix use GFP_KERNEL allocation where possible.

Tested-by: Neal Becker <ndbecker2@gmail.com>
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
---
 drivers/net/r8169.c |   12 ++++++------
 1 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index fe3b762..a7fb044 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -4006,7 +4006,7 @@ static inline void rtl8169_map_to_asic(struct RxDesc *desc, dma_addr_t mapping,
 static struct sk_buff *rtl8169_alloc_rx_skb(struct pci_dev *pdev,
 					    struct net_device *dev,
 					    struct RxDesc *desc, int rx_buf_sz,
-					    unsigned int align)
+					    unsigned int align, gfp_t gfp)
 {
 	struct sk_buff *skb;
 	dma_addr_t mapping;
@@ -4014,7 +4014,7 @@ static struct sk_buff *rtl8169_alloc_rx_skb(struct pci_dev *pdev,
 
 	pad = align ? align : NET_IP_ALIGN;
 
-	skb = netdev_alloc_skb(dev, rx_buf_sz + pad);
+	skb = __netdev_alloc_skb(dev, rx_buf_sz + pad, gfp);
 	if (!skb)
 		goto err_out;
 
@@ -4045,7 +4045,7 @@ static void rtl8169_rx_clear(struct rtl8169_private *tp)
 }
 
 static u32 rtl8169_rx_fill(struct rtl8169_private *tp, struct net_device *dev,
-			   u32 start, u32 end)
+			   u32 start, u32 end, gfp_t gfp)
 {
 	u32 cur;
 
@@ -4060,7 +4060,7 @@ static u32 rtl8169_rx_fill(struct rtl8169_private *tp, struct net_device *dev,
 
 		skb = rtl8169_alloc_rx_skb(tp->pci_dev, dev,
 					   tp->RxDescArray + i,
-					   tp->rx_buf_sz, tp->align);
+					   tp->rx_buf_sz, tp->align, gfp);
 		if (!skb)
 			break;
 
@@ -4088,7 +4088,7 @@ static int rtl8169_init_ring(struct net_device *dev)
 	memset(tp->tx_skb, 0x0, NUM_TX_DESC * sizeof(struct ring_info));
 	memset(tp->Rx_skbuff, 0x0, NUM_RX_DESC * sizeof(struct sk_buff *));
 
-	if (rtl8169_rx_fill(tp, dev, 0, NUM_RX_DESC) != NUM_RX_DESC)
+	if (rtl8169_rx_fill(tp, dev, 0, NUM_RX_DESC, GFP_KERNEL) != NUM_RX_DESC)
 		goto err_out;
 
 	rtl8169_mark_as_last_descriptor(tp->RxDescArray + NUM_RX_DESC - 1);
@@ -4587,7 +4587,7 @@ static int rtl8169_rx_interrupt(struct net_device *dev,
 	count = cur_rx - tp->cur_rx;
 	tp->cur_rx = cur_rx;
 
-	delta = rtl8169_rx_fill(tp, dev, tp->dirty_rx, tp->cur_rx);
+	delta = rtl8169_rx_fill(tp, dev, tp->dirty_rx, tp->cur_rx, GFP_ATOMIC);
 	if (!delta && count)
 		netif_info(tp, intr, dev, "no Rx buffer allocated\n");
 	tp->dirty_rx += delta;
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* [PATCH 2/2] r8169: use device model DMA API
  2010-10-08 14:25 [PATCH 1/2] r8169: allocate with GFP_KERNEL flag when able to sleep Stanislaw Gruszka
@ 2010-10-08 14:25 ` Stanislaw Gruszka
  2010-10-09  7:57   ` Eric Dumazet
  2010-10-08 14:52 ` [PATCH 1/2] r8169: allocate with GFP_KERNEL flag when able to sleep Stanislaw Gruszka
  2010-10-09  7:54 ` Eric Dumazet
  2 siblings, 1 reply; 18+ messages in thread
From: Stanislaw Gruszka @ 2010-10-08 14:25 UTC (permalink / raw)
  To: Francois Romieu, netdev; +Cc: Stanislaw Gruszka

Use DMA API as PCI equivalents will be deprecated. This change also
allow to allocate with GFP_KERNEL where possible.

Tested-by: Neal Becker <ndbecker2@gmail.com>
Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
---
 drivers/net/r8169.c |   53 +++++++++++++++++++++++++++-----------------------
 1 files changed, 29 insertions(+), 24 deletions(-)

diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index a7fb044..bc669a4 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -1217,7 +1217,8 @@ static void rtl8169_update_counters(struct net_device *dev)
 	if ((RTL_R8(ChipCmd) & CmdRxEnb) == 0)
 		return;
 
-	counters = pci_alloc_consistent(tp->pci_dev, sizeof(*counters), &paddr);
+	counters = dma_alloc_coherent(&tp->pci_dev->dev, sizeof(*counters),
+				      &paddr, GFP_KERNEL);
 	if (!counters)
 		return;
 
@@ -1238,7 +1239,8 @@ static void rtl8169_update_counters(struct net_device *dev)
 	RTL_W32(CounterAddrLow, 0);
 	RTL_W32(CounterAddrHigh, 0);
 
-	pci_free_consistent(tp->pci_dev, sizeof(*counters), counters, paddr);
+	dma_free_coherent(&tp->pci_dev->dev, sizeof(*counters), counters,
+			  paddr);
 }
 
 static void rtl8169_get_ethtool_stats(struct net_device *dev,
@@ -3298,15 +3300,15 @@ static int rtl8169_open(struct net_device *dev)
 
 	/*
 	 * Rx and Tx desscriptors needs 256 bytes alignment.
-	 * pci_alloc_consistent provides more.
+	 * dma_alloc_coherent provides more.
 	 */
-	tp->TxDescArray = pci_alloc_consistent(pdev, R8169_TX_RING_BYTES,
-					       &tp->TxPhyAddr);
+	tp->TxDescArray = dma_alloc_coherent(&pdev->dev, R8169_TX_RING_BYTES,
+					     &tp->TxPhyAddr, GFP_KERNEL);
 	if (!tp->TxDescArray)
 		goto err_pm_runtime_put;
 
-	tp->RxDescArray = pci_alloc_consistent(pdev, R8169_RX_RING_BYTES,
-					       &tp->RxPhyAddr);
+	tp->RxDescArray = dma_alloc_coherent(&pdev->dev, R8169_RX_RING_BYTES,
+					     &tp->RxPhyAddr, GFP_KERNEL);
 	if (!tp->RxDescArray)
 		goto err_free_tx_0;
 
@@ -3340,12 +3342,12 @@ out:
 err_release_ring_2:
 	rtl8169_rx_clear(tp);
 err_free_rx_1:
-	pci_free_consistent(pdev, R8169_RX_RING_BYTES, tp->RxDescArray,
-			    tp->RxPhyAddr);
+	dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
+			  tp->RxPhyAddr);
 	tp->RxDescArray = NULL;
 err_free_tx_0:
-	pci_free_consistent(pdev, R8169_TX_RING_BYTES, tp->TxDescArray,
-			    tp->TxPhyAddr);
+	dma_free_coherent(&pdev->dev, R8169_TX_RING_BYTES, tp->TxDescArray,
+			  tp->TxPhyAddr);
 	tp->TxDescArray = NULL;
 err_pm_runtime_put:
 	pm_runtime_put_noidle(&pdev->dev);
@@ -3981,7 +3983,7 @@ static void rtl8169_free_rx_skb(struct rtl8169_private *tp,
 {
 	struct pci_dev *pdev = tp->pci_dev;
 
-	pci_unmap_single(pdev, le64_to_cpu(desc->addr), tp->rx_buf_sz,
+	dma_unmap_single(&pdev->dev, le64_to_cpu(desc->addr), tp->rx_buf_sz,
 			 PCI_DMA_FROMDEVICE);
 	dev_kfree_skb(*sk_buff);
 	*sk_buff = NULL;
@@ -4020,7 +4022,7 @@ static struct sk_buff *rtl8169_alloc_rx_skb(struct pci_dev *pdev,
 
 	skb_reserve(skb, align ? ((pad - 1) & (unsigned long)skb->data) : pad);
 
-	mapping = pci_map_single(pdev, skb->data, rx_buf_sz,
+	mapping = dma_map_single(&pdev->dev, skb->data, rx_buf_sz,
 				 PCI_DMA_FROMDEVICE);
 
 	rtl8169_map_to_asic(desc, mapping, rx_buf_sz);
@@ -4105,7 +4107,8 @@ static void rtl8169_unmap_tx_skb(struct pci_dev *pdev, struct ring_info *tx_skb,
 {
 	unsigned int len = tx_skb->len;
 
-	pci_unmap_single(pdev, le64_to_cpu(desc->addr), len, PCI_DMA_TODEVICE);
+	dma_unmap_single(&pdev->dev, le64_to_cpu(desc->addr), len,
+			 PCI_DMA_TODEVICE);
 	desc->opts1 = 0x00;
 	desc->opts2 = 0x00;
 	desc->addr = 0x00;
@@ -4249,7 +4252,8 @@ static int rtl8169_xmit_frags(struct rtl8169_private *tp, struct sk_buff *skb,
 		txd = tp->TxDescArray + entry;
 		len = frag->size;
 		addr = ((void *) page_address(frag->page)) + frag->page_offset;
-		mapping = pci_map_single(tp->pci_dev, addr, len, PCI_DMA_TODEVICE);
+		mapping = dma_map_single(&tp->pci_dev->dev, addr, len,
+					 PCI_DMA_TODEVICE);
 
 		/* anti gcc 2.95.3 bugware (sic) */
 		status = opts1 | len | (RingEnd * !((entry + 1) % NUM_TX_DESC));
@@ -4319,7 +4323,8 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
 		tp->tx_skb[entry].skb = skb;
 	}
 
-	mapping = pci_map_single(tp->pci_dev, skb->data, len, PCI_DMA_TODEVICE);
+	mapping = dma_map_single(&tp->pci_dev->dev, skb->data, len,
+				 PCI_DMA_TODEVICE);
 
 	tp->tx_skb[entry].len = len;
 	txd->addr = cpu_to_le64(mapping);
@@ -4482,8 +4487,8 @@ static inline bool rtl8169_try_rx_copy(struct sk_buff **sk_buff,
 	if (!skb)
 		goto out;
 
-	pci_dma_sync_single_for_cpu(tp->pci_dev, addr, pkt_size,
-				    PCI_DMA_FROMDEVICE);
+	dma_sync_single_for_cpu(&tp->pci_dev->dev, addr, pkt_size,
+				PCI_DMA_FROMDEVICE);
 	skb_copy_from_linear_data(*sk_buff, skb->data, pkt_size);
 	*sk_buff = skb;
 	done = true;
@@ -4552,11 +4557,11 @@ static int rtl8169_rx_interrupt(struct net_device *dev,
 			}
 
 			if (rtl8169_try_rx_copy(&skb, tp, pkt_size, addr)) {
-				pci_dma_sync_single_for_device(pdev, addr,
+				dma_sync_single_for_device(&pdev->dev, addr,
 					pkt_size, PCI_DMA_FROMDEVICE);
 				rtl8169_mark_to_asic(desc, tp->rx_buf_sz);
 			} else {
-				pci_unmap_single(pdev, addr, tp->rx_buf_sz,
+				dma_unmap_single(&pdev->dev, addr, tp->rx_buf_sz,
 						 PCI_DMA_FROMDEVICE);
 				tp->Rx_skbuff[entry] = NULL;
 			}
@@ -4773,10 +4778,10 @@ static int rtl8169_close(struct net_device *dev)
 
 	free_irq(dev->irq, dev);
 
-	pci_free_consistent(pdev, R8169_RX_RING_BYTES, tp->RxDescArray,
-			    tp->RxPhyAddr);
-	pci_free_consistent(pdev, R8169_TX_RING_BYTES, tp->TxDescArray,
-			    tp->TxPhyAddr);
+	dma_free_coherent(&pdev->dev, R8169_RX_RING_BYTES, tp->RxDescArray,
+			  tp->RxPhyAddr);
+	dma_free_coherent(&pdev->dev, R8169_TX_RING_BYTES, tp->TxDescArray,
+			  tp->TxPhyAddr);
 	tp->TxDescArray = NULL;
 	tp->RxDescArray = NULL;
 
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] r8169: allocate with GFP_KERNEL flag when able to sleep
  2010-10-08 14:25 [PATCH 1/2] r8169: allocate with GFP_KERNEL flag when able to sleep Stanislaw Gruszka
  2010-10-08 14:25 ` [PATCH 2/2] r8169: use device model DMA API Stanislaw Gruszka
@ 2010-10-08 14:52 ` Stanislaw Gruszka
  2010-10-08 15:04   ` Eric Dumazet
  2010-10-09  7:54 ` Eric Dumazet
  2 siblings, 1 reply; 18+ messages in thread
From: Stanislaw Gruszka @ 2010-10-08 14:52 UTC (permalink / raw)
  To: Francois Romieu, netdev

On Fri, Oct 08, 2010 at 04:25:00PM +0200, Stanislaw Gruszka wrote:
> We have fedora bug report where driver fail to initialize after
> suspend/resume because of memory allocation errors:
> https://bugzilla.redhat.com/show_bug.cgi?id=629158

There is also one more thing to do regarding above. Calltraces from bug
reports, shows that order 3 allocation fail. On arch with 4kB pages,
order 3 mean 32kB allocation. We want to alloc 16kB, but there is also
internal sk_buff data what make that we exceed the boundary and take
32kB from allocator, getting almost 50% wastage.

To fix we can use similar method as in niu or iwlwifi drivers, alloc
pages directly form buddy allocator and attach them to skb (by
skb_add_rx_frag for example). I'm going to prepare such patch, but
I have one doubt, what happens if page size in system is bigger
than 16kB, should I care about such case? 

Stanislaw

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] r8169: allocate with GFP_KERNEL flag when able to sleep
  2010-10-08 14:52 ` [PATCH 1/2] r8169: allocate with GFP_KERNEL flag when able to sleep Stanislaw Gruszka
@ 2010-10-08 15:04   ` Eric Dumazet
  2010-10-08 16:03     ` Stanislaw Gruszka
  2010-10-11 16:03     ` [PATCH 1/2] r8169: allocate with GFP_KERNEL flag when able to sleep Christoph Lameter
  0 siblings, 2 replies; 18+ messages in thread
From: Eric Dumazet @ 2010-10-08 15:04 UTC (permalink / raw)
  To: Stanislaw Gruszka; +Cc: Francois Romieu, netdev

Le vendredi 08 octobre 2010 à 16:52 +0200, Stanislaw Gruszka a écrit :
> On Fri, Oct 08, 2010 at 04:25:00PM +0200, Stanislaw Gruszka wrote:
> > We have fedora bug report where driver fail to initialize after
> > suspend/resume because of memory allocation errors:
> > https://bugzilla.redhat.com/show_bug.cgi?id=629158
> 
> There is also one more thing to do regarding above. Calltraces from bug
> reports, shows that order 3 allocation fail. On arch with 4kB pages,
> order 3 mean 32kB allocation. We want to alloc 16kB, but there is also
> internal sk_buff data what make that we exceed the boundary and take
> 32kB from allocator, getting almost 50% wastage.
> 

Or its only an 1460+overhead allocation, and SLUB uses order-3 pages to
satisfy 2048 bytes allocations.

# grep 2048 /proc/slabinfo 
kmalloc-2048        8664   8752   2048   16    8 : tunables    0    0
0 : slabdata    547    547      0


8 in the <pagesperslab> column just says that : order-3 pages, even for
small allocations.

Switch to SLAB -> no more problem ;)


> To fix we can use similar method as in niu or iwlwifi drivers, alloc
> pages directly form buddy allocator and attach them to skb (by
> skb_add_rx_frag for example). I'm going to prepare such patch, but
> I have one doubt, what happens if page size in system is bigger
> than 16kB, should I care about such case? 

Seems tricky. Should we patch all drivers to do something like that ?




^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] r8169: allocate with GFP_KERNEL flag when able to sleep
  2010-10-08 15:04   ` Eric Dumazet
@ 2010-10-08 16:03     ` Stanislaw Gruszka
  2010-10-08 16:27       ` Eric Dumazet
  2010-10-09 15:59       ` [PATCH] net: introduce alloc_skb_order0 Eric Dumazet
  2010-10-11 16:03     ` [PATCH 1/2] r8169: allocate with GFP_KERNEL flag when able to sleep Christoph Lameter
  1 sibling, 2 replies; 18+ messages in thread
From: Stanislaw Gruszka @ 2010-10-08 16:03 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Francois Romieu, netdev

On Fri, Oct 08, 2010 at 05:04:07PM +0200, Eric Dumazet wrote:
> Le vendredi 08 octobre 2010 à 16:52 +0200, Stanislaw Gruszka a écrit :
> > On Fri, Oct 08, 2010 at 04:25:00PM +0200, Stanislaw Gruszka wrote:
> > > We have fedora bug report where driver fail to initialize after
> > > suspend/resume because of memory allocation errors:
> > > https://bugzilla.redhat.com/show_bug.cgi?id=629158
> > 
> > There is also one more thing to do regarding above. Calltraces from bug
> > reports, shows that order 3 allocation fail. On arch with 4kB pages,
> > order 3 mean 32kB allocation. We want to alloc 16kB, but there is also
> > internal sk_buff data what make that we exceed the boundary and take
> > 32kB from allocator, getting almost 50% wastage.
> > 
> 
> Or its only an 1460+overhead allocation, and SLUB uses order-3 pages to
> satisfy 2048 bytes allocations.

Rather not, trace show failure in rtl8169_rx_fill, where we allocate rx
buffers and these are 16kB big by default.

> Switch to SLAB -> no more problem ;)

yeh, I wish to, but fedora use SLUB because of some debugging
capabilities. 

> > To fix we can use similar method as in niu or iwlwifi drivers, alloc
> > pages directly form buddy allocator and attach them to skb (by
> > skb_add_rx_frag for example). I'm going to prepare such patch, but
> > I have one doubt, what happens if page size in system is bigger
> > than 16kB, should I care about such case? 
> 
> Seems tricky. Should we patch all drivers to do something like that ?

I think, only on these drivers which do alloc_skb(n*PAGE_SIZE).
As alternative we can be smarter in alloc_skb.

Stanislaw
> 
> 
> 

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] r8169: allocate with GFP_KERNEL flag when able to sleep
  2010-10-08 16:03     ` Stanislaw Gruszka
@ 2010-10-08 16:27       ` Eric Dumazet
  2010-10-09 15:59       ` [PATCH] net: introduce alloc_skb_order0 Eric Dumazet
  1 sibling, 0 replies; 18+ messages in thread
From: Eric Dumazet @ 2010-10-08 16:27 UTC (permalink / raw)
  To: Stanislaw Gruszka; +Cc: Francois Romieu, netdev

Le vendredi 08 octobre 2010 à 18:03 +0200, Stanislaw Gruszka a écrit :
> On Fri, Oct 08, 2010 at 05:04:07PM +0200, Eric Dumazet wrote:
> > Le vendredi 08 octobre 2010 à 16:52 +0200, Stanislaw Gruszka a écrit :
> > > On Fri, Oct 08, 2010 at 04:25:00PM +0200, Stanislaw Gruszka wrote:
> > > > We have fedora bug report where driver fail to initialize after
> > > > suspend/resume because of memory allocation errors:
> > > > https://bugzilla.redhat.com/show_bug.cgi?id=629158
> > > 
> > > There is also one more thing to do regarding above. Calltraces from bug
> > > reports, shows that order 3 allocation fail. On arch with 4kB pages,
> > > order 3 mean 32kB allocation. We want to alloc 16kB, but there is also
> > > internal sk_buff data what make that we exceed the boundary and take
> > > 32kB from allocator, getting almost 50% wastage.
> > > 
> > 
> > Or its only an 1460+overhead allocation, and SLUB uses order-3 pages to
> > satisfy 2048 bytes allocations.
> 
> Rather not, trace show failure in rtl8169_rx_fill, where we allocate rx
> buffers and these are 16kB big by default.
> 

Only when gfp_t is GFP_KERNEL to fill rx buffers. (after your patch
applied of course). This should succeed. If not, driver cannot load and
function, since this NIC really needs 16KB buffers in order to avoid a
hardware bug.

Once allocated for RX rings, we never free them (never give this skb to
upper stack) : When we receive a frame, we copybreak it, (using
GFP_ATOMIC) so it depends on MTU.

With MTU=1500, I am pretty sure we allocate 2048 bytes chunks, not more.


> I think, only on these drivers which do alloc_skb(n*PAGE_SIZE).
> As alternative we can be smarter in alloc_skb.

Only if MTU is non standard, then.

I repeat : With standard MTU=1500, we dont allocate huge skbs in rx
path, only small (<2048 bytes) ones.

For bigger frames, then you might allocate fragments, using pages, and
dont care if PAGE_SIZE is 64Kbytes.




^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] r8169: allocate with GFP_KERNEL flag when able to sleep
  2010-10-08 14:25 [PATCH 1/2] r8169: allocate with GFP_KERNEL flag when able to sleep Stanislaw Gruszka
  2010-10-08 14:25 ` [PATCH 2/2] r8169: use device model DMA API Stanislaw Gruszka
  2010-10-08 14:52 ` [PATCH 1/2] r8169: allocate with GFP_KERNEL flag when able to sleep Stanislaw Gruszka
@ 2010-10-09  7:54 ` Eric Dumazet
  2010-10-09 16:17   ` David Miller
  2 siblings, 1 reply; 18+ messages in thread
From: Eric Dumazet @ 2010-10-09  7:54 UTC (permalink / raw)
  To: Stanislaw Gruszka; +Cc: Francois Romieu, netdev

Le vendredi 08 octobre 2010 à 16:25 +0200, Stanislaw Gruszka a écrit :
> We have fedora bug report where driver fail to initialize after
> suspend/resume because of memory allocation errors:
> https://bugzilla.redhat.com/show_bug.cgi?id=629158
> 
> To fix use GFP_KERNEL allocation where possible.
> 
> Tested-by: Neal Becker <ndbecker2@gmail.com>
> Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>

Acked-by: Eric Dumazet <eric.dumazet@gmail.com>



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/2] r8169: use device model DMA API
  2010-10-08 14:25 ` [PATCH 2/2] r8169: use device model DMA API Stanislaw Gruszka
@ 2010-10-09  7:57   ` Eric Dumazet
  2010-10-09 16:17     ` David Miller
  0 siblings, 1 reply; 18+ messages in thread
From: Eric Dumazet @ 2010-10-09  7:57 UTC (permalink / raw)
  To: Stanislaw Gruszka; +Cc: Francois Romieu, netdev

Le vendredi 08 octobre 2010 à 16:25 +0200, Stanislaw Gruszka a écrit :
> Use DMA API as PCI equivalents will be deprecated. This change also
> allow to allocate with GFP_KERNEL where possible.
> 
> Tested-by: Neal Becker <ndbecker2@gmail.com>
> Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
> ---
>  drivers/net/r8169.c |   53 +++++++++++++++++++++++++++-----------------------
>  1 files changed, 29 insertions(+), 24 deletions(-)

Acked-by: Eric Dumazet <eric.dumazet@gmail.com>



^ permalink raw reply	[flat|nested] 18+ messages in thread

* [PATCH] net: introduce alloc_skb_order0
  2010-10-08 16:03     ` Stanislaw Gruszka
  2010-10-08 16:27       ` Eric Dumazet
@ 2010-10-09 15:59       ` Eric Dumazet
  2010-10-11 15:55         ` Stanislaw Gruszka
  1 sibling, 1 reply; 18+ messages in thread
From: Eric Dumazet @ 2010-10-09 15:59 UTC (permalink / raw)
  To: Stanislaw Gruszka, David Miller; +Cc: Francois Romieu, netdev

Le vendredi 08 octobre 2010 à 18:03 +0200, Stanislaw Gruszka a écrit :
> On Fri, Oct 08, 2010 at 05:04:07PM +0200, Eric Dumazet wrote:

> > Switch to SLAB -> no more problem ;)
> 
> yeh, I wish to, but fedora use SLUB because of some debugging
> capabilities. 

Yes, of course, I was kidding :)

echo 0 >/sys/kernel/slab/kmalloc-2048/order
echo 0 >/sys/kernel/slab/kmalloc-1024/order
echo 0 >/sys/kernel/slab/kmalloc-512/order

Should do the trick : No more high order allocations for MTU=1500
frames.


For MTU=9000 frames, we probably need something like this patch :

(Not yet for inclusion, this is an RFC, this will need two separate
patches)

[PATCH] net: introduce alloc_skb_order0()

Reception of big frames hit a memory allocation problem, because of high
order pages allocations (order-3 sometimes for MTU=9000). This patch
introduces alloc_skb_order0(), to build skbs with order-0 pages only.

Their headlen is at most SKB_MAX_HEAD(NET_SKB_PAD + NET_IP_ALIGN)
(3648 bytes on x86_64, 3840 bytes on x86_32)

As net drivers might use skb_store_bits() to copy data to this newly
allocated skb, we might even use __GFP_HIGHMEM for the fragments ?

Note : Use GFP_NOWAIT | __GFP_NOWARN mask to allocate pages, since we
dont want to let big packets exhaust GFP_ATOMIC pool.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 drivers/net/r8169.c    |   19 ++++++---------
 include/linux/skbuff.h |    1 
 net/core/skbuff.c      |   47 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+), 11 deletions(-)

diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index fe3b762..f4220db 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -4468,27 +4468,24 @@ static inline void rtl8169_rx_csum(struct sk_buff *skb, u32 opts1)
 		skb_checksum_none_assert(skb);
 }
 
-static inline bool rtl8169_try_rx_copy(struct sk_buff **sk_buff,
+static inline bool rtl8169_try_rx_copy(struct sk_buff **pskb,
 				       struct rtl8169_private *tp, int pkt_size,
 				       dma_addr_t addr)
 {
 	struct sk_buff *skb;
-	bool done = false;
 
 	if (pkt_size >= rx_copybreak)
-		goto out;
+		return false;
 
-	skb = netdev_alloc_skb_ip_align(tp->dev, pkt_size);
+	skb = alloc_skb_order0(pkt_size);
 	if (!skb)
-		goto out;
+		return false;
 
 	pci_dma_sync_single_for_cpu(tp->pci_dev, addr, pkt_size,
 				    PCI_DMA_FROMDEVICE);
-	skb_copy_from_linear_data(*sk_buff, skb->data, pkt_size);
-	*sk_buff = skb;
-	done = true;
-out:
-	return done;
+	skb_store_bits(skb, 0, (*pskb)->data, pkt_size);
+	*pskb = skb;
+	return true;
 }
 
 /*
@@ -4559,10 +4556,10 @@ static int rtl8169_rx_interrupt(struct net_device *dev,
 				pci_unmap_single(pdev, addr, tp->rx_buf_sz,
 						 PCI_DMA_FROMDEVICE);
 				tp->Rx_skbuff[entry] = NULL;
+				skb_put(skb, pkt_size);
 			}
 
 			rtl8169_rx_csum(skb, status);
-			skb_put(skb, pkt_size);
 			skb->protocol = eth_type_trans(skb, dev);
 
 			if (rtl8169_rx_vlan_skb(tp, desc, skb, polling) < 0) {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0b53c43..2cc161a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1841,6 +1841,7 @@ extern int	       skb_copy_bits(const struct sk_buff *skb, int offset,
 				     void *to, int len);
 extern int	       skb_store_bits(struct sk_buff *skb, int offset,
 				      const void *from, int len);
+extern struct sk_buff *alloc_skb_order0(int pkt_size);
 extern __wsum	       skb_copy_and_csum_bits(const struct sk_buff *skb,
 					      int offset, u8 *to, int len,
 					      __wsum csum);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 752c197..4a6195d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1664,6 +1664,53 @@ fault:
 }
 EXPORT_SYMBOL(skb_store_bits);
 
+/**
+ * alloc_skb_order0 - allocate skb with order-0 requirements
+ * @pkt_size: packet size
+ * 
+ * Allocate an skb with a head small enough that skb->data should not
+ * require high order page allocation, and complete with fragments if
+ * pkt_size is too big. Might be use in drivers RX path : We reserve
+ * NET_SKB_PAD + NET_IP_ALIGN bytes and use GFP_ATOMIC allocations.
+ * We also set skb->len to pkt_size, so driver should not call skb_put()
+ */
+struct sk_buff *alloc_skb_order0(int pkt_size)
+{
+	int head = min_t(int, pkt_size, SKB_MAX_HEAD(NET_SKB_PAD + NET_IP_ALIGN));
+	struct sk_buff *skb;
+
+	skb = alloc_skb(head + NET_SKB_PAD + NET_IP_ALIGN,
+			GFP_ATOMIC | __GFP_NOWARN);
+	if (!skb)
+		return NULL;
+	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+	skb_put(skb, head);
+	pkt_size -= head;
+
+	skb->len += pkt_size;
+	skb->data_len += pkt_size;
+	skb->truesize += pkt_size;
+	while (pkt_size) {
+		int i = skb_shinfo(skb)->nr_frags++;
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+		int fragsize = min_t(int, pkt_size, PAGE_SIZE);
+		struct page *page = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
+
+		if (!page)
+			goto error;
+		frag->page = page;
+		frag->size = fragsize;
+		frag->page_offset = 0;
+		pkt_size -= fragsize;
+	}
+	return skb;
+
+error:
+	kfree_skb(skb);
+	return NULL;	
+}
+EXPORT_SYMBOL(alloc_skb_order0);
+
 /* Checksum skb data. */
 
 __wsum skb_checksum(const struct sk_buff *skb, int offset,



^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] r8169: allocate with GFP_KERNEL flag when able to sleep
  2010-10-09  7:54 ` Eric Dumazet
@ 2010-10-09 16:17   ` David Miller
  0 siblings, 0 replies; 18+ messages in thread
From: David Miller @ 2010-10-09 16:17 UTC (permalink / raw)
  To: eric.dumazet; +Cc: sgruszka, romieu, netdev

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 09 Oct 2010 09:54:04 +0200

> Le vendredi 08 octobre 2010 à 16:25 +0200, Stanislaw Gruszka a écrit :
>> We have fedora bug report where driver fail to initialize after
>> suspend/resume because of memory allocation errors:
>> https://bugzilla.redhat.com/show_bug.cgi?id=629158
>> 
>> To fix use GFP_KERNEL allocation where possible.
>> 
>> Tested-by: Neal Becker <ndbecker2@gmail.com>
>> Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
> 
> Acked-by: Eric Dumazet <eric.dumazet@gmail.com>

Applied.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 2/2] r8169: use device model DMA API
  2010-10-09  7:57   ` Eric Dumazet
@ 2010-10-09 16:17     ` David Miller
  0 siblings, 0 replies; 18+ messages in thread
From: David Miller @ 2010-10-09 16:17 UTC (permalink / raw)
  To: eric.dumazet; +Cc: sgruszka, romieu, netdev

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 09 Oct 2010 09:57:35 +0200

> Le vendredi 08 octobre 2010 à 16:25 +0200, Stanislaw Gruszka a écrit :
>> Use DMA API as PCI equivalents will be deprecated. This change also
>> allow to allocate with GFP_KERNEL where possible.
>> 
>> Tested-by: Neal Becker <ndbecker2@gmail.com>
>> Signed-off-by: Stanislaw Gruszka <sgruszka@redhat.com>
>> ---
>>  drivers/net/r8169.c |   53 +++++++++++++++++++++++++++-----------------------
>>  1 files changed, 29 insertions(+), 24 deletions(-)
> 
> Acked-by: Eric Dumazet <eric.dumazet@gmail.com>

Applied.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] net: introduce alloc_skb_order0
  2010-10-09 15:59       ` [PATCH] net: introduce alloc_skb_order0 Eric Dumazet
@ 2010-10-11 15:55         ` Stanislaw Gruszka
  2010-10-11 16:05           ` Eric Dumazet
  0 siblings, 1 reply; 18+ messages in thread
From: Stanislaw Gruszka @ 2010-10-11 15:55 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, Francois Romieu, netdev

On Sat, Oct 09, 2010 at 05:59:56PM +0200, Eric Dumazet wrote:
> Le vendredi 08 octobre 2010 à 18:03 +0200, Stanislaw Gruszka a écrit :
> > On Fri, Oct 08, 2010 at 05:04:07PM +0200, Eric Dumazet wrote:
> 
> > > Switch to SLAB -> no more problem ;)
> > 
> > yeh, I wish to, but fedora use SLUB because of some debugging
> > capabilities. 
> 
> Yes, of course, I was kidding :)
> 
> echo 0 >/sys/kernel/slab/kmalloc-2048/order
> echo 0 >/sys/kernel/slab/kmalloc-1024/order
> echo 0 >/sys/kernel/slab/kmalloc-512/order
> 
> Should do the trick : No more high order allocations for MTU=1500
> frames.

So the SLUB is great, but we need a patch to avoid using it :-)

> For MTU=9000 frames, we probably need something like this patch :
>
> Reception of big frames hit a memory allocation problem, because of high
> order pages allocations (order-3 sometimes for MTU=9000). This patch
> introduces alloc_skb_order0(), to build skbs with order-0 pages only.

I had never seen allocation problems in rtl8169_try_rx_copy or in any
other driver rx path (except iwlwifi, but now this is solved by using
skb_add_rx_frag), so I'm not sure if need this patch.

However I see other benefit of that patch. We save memory. Allocating
for MTU 9000 gives something like skb->data = kmalloc(9000 + 32 + 2
+ 334). So we take data from kmalloc-16384 cache, we waste about 7kB on
every allocation. With patch wastage would be about 2k per allocation
(assuming 4kB and 8kB page size)

However I started this thread thinking about other memory wastage,
in rtl8169_alloc_rx_skb, skb->data = kmalloc(16383 + 32 + 2 + 334), taken
from kmalloc-32768, almost 50% wastage.
 
> +struct sk_buff *alloc_skb_order0(int pkt_size)
> +{
> +	int head = min_t(int, pkt_size, SKB_MAX_HEAD(NET_SKB_PAD + NET_IP_ALIGN));
> +	struct sk_buff *skb;
> +
> +	skb = alloc_skb(head + NET_SKB_PAD + NET_IP_ALIGN,
> +			GFP_ATOMIC | __GFP_NOWARN);
> +	if (!skb)
> +		return NULL;
> +	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
> +	skb_put(skb, head);
> +	pkt_size -= head;
> +
> +	skb->len += pkt_size;
> +	skb->data_len += pkt_size;
> +	skb->truesize += pkt_size;
> +	while (pkt_size) {

if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS - 1)
	goto error;

> +		int i = skb_shinfo(skb)->nr_frags++;
> +		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
> +		int fragsize = min_t(int, pkt_size, PAGE_SIZE);
> +		struct page *page = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
> +
> +		if (!page)
> +			goto error;
> +		frag->page = page;
> +		frag->size = fragsize;
> +		frag->page_offset = 0;
> +		pkt_size -= fragsize;
> +	}
> +	return skb;
> +
> +error:
> +	kfree_skb(skb);
> +	return NULL;	
> +}
> +EXPORT_SYMBOL(alloc_skb_order0);
> +
>  /* Checksum skb data. */
>  
>  __wsum skb_checksum(const struct sk_buff *skb, int offset,

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] r8169: allocate with GFP_KERNEL flag when able to sleep
  2010-10-08 15:04   ` Eric Dumazet
  2010-10-08 16:03     ` Stanislaw Gruszka
@ 2010-10-11 16:03     ` Christoph Lameter
  2010-10-11 16:07       ` Eric Dumazet
  1 sibling, 1 reply; 18+ messages in thread
From: Christoph Lameter @ 2010-10-11 16:03 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Stanislaw Gruszka, Francois Romieu, netdev

On Fri, 8 Oct 2010, Eric Dumazet wrote:

> 8 in the <pagesperslab> column just says that : order-3 pages, even for
> small allocations.

Those allocations will fallback to smaller allocs if the page allocator
has trouble satisfying those requests.


^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] net: introduce alloc_skb_order0
  2010-10-11 15:55         ` Stanislaw Gruszka
@ 2010-10-11 16:05           ` Eric Dumazet
  2010-10-11 21:17             ` Eric Dumazet
  0 siblings, 1 reply; 18+ messages in thread
From: Eric Dumazet @ 2010-10-11 16:05 UTC (permalink / raw)
  To: Stanislaw Gruszka; +Cc: David Miller, Francois Romieu, netdev

Le lundi 11 octobre 2010 à 17:55 +0200, Stanislaw Gruszka a écrit :
> On Sat, Oct 09, 2010 at 05:59:56PM +0200, Eric Dumazet wrote:
> > Le vendredi 08 octobre 2010 à 18:03 +0200, Stanislaw Gruszka a écrit :
> > > On Fri, Oct 08, 2010 at 05:04:07PM +0200, Eric Dumazet wrote:
> > 
> > > > Switch to SLAB -> no more problem ;)
> > > 
> > > yeh, I wish to, but fedora use SLUB because of some debugging
> > > capabilities. 
> > 
> > Yes, of course, I was kidding :)
> > 
> > echo 0 >/sys/kernel/slab/kmalloc-2048/order
> > echo 0 >/sys/kernel/slab/kmalloc-1024/order
> > echo 0 >/sys/kernel/slab/kmalloc-512/order
> > 
> > Should do the trick : No more high order allocations for MTU=1500
> > frames.
> 
> So the SLUB is great, but we need a patch to avoid using it :-)
> 
> > For MTU=9000 frames, we probably need something like this patch :
> >
> > Reception of big frames hit a memory allocation problem, because of high
> > order pages allocations (order-3 sometimes for MTU=9000). This patch
> > introduces alloc_skb_order0(), to build skbs with order-0 pages only.
> 
> I had never seen allocation problems in rtl8169_try_rx_copy or in any
> other driver rx path (except iwlwifi, but now this is solved by using
> skb_add_rx_frag), so I'm not sure if need this patch.
> 
> However I see other benefit of that patch. We save memory. Allocating
> for MTU 9000 gives something like skb->data = kmalloc(9000 + 32 + 2
> + 334). So we take data from kmalloc-16384 cache, we waste about 7kB on
> every allocation. With patch wastage would be about 2k per allocation
> (assuming 4kB and 8kB page size)
> 
> However I started this thread thinking about other memory wastage,
> in rtl8169_alloc_rx_skb, skb->data = kmalloc(16383 + 32 + 2 + 334), taken
> from kmalloc-32768, almost 50% wastage.
>  

You cannot use my patch to avoid this waste. Really.

You have two different things in this driver :

1) Allocation of a physically continous 16Kbytes bloc for the rx-ring,
at device initialization (GFP_KERNEL OK here)

   Here, the only thing you could do is to not allocate real skbs but
only 16KB data blocs (no need for the sk_buf, only the ->data part), and
force copybreak for all incoming packets (remove the rx_copybreak
tunable)

2) Allocation of order0 skb to perform the copybreak in rx path.
(GFP_ATOMIC) : My patch.


> > +struct sk_buff *alloc_skb_order0(int pkt_size)
> > +{
> > +	int head = min_t(int, pkt_size, SKB_MAX_HEAD(NET_SKB_PAD + NET_IP_ALIGN));
> > +	struct sk_buff *skb;
> > +
> > +	skb = alloc_skb(head + NET_SKB_PAD + NET_IP_ALIGN,
> > +			GFP_ATOMIC | __GFP_NOWARN);
> > +	if (!skb)
> > +		return NULL;
> > +	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
> > +	skb_put(skb, head);
> > +	pkt_size -= head;
> > +
> > +	skb->len += pkt_size;
> > +	skb->data_len += pkt_size;
> > +	skb->truesize += pkt_size;
> > +	while (pkt_size) {
> 
> if (skb_shinfo(skb)->nr_frags == MAX_SKB_FRAGS - 1)
> 	goto error;

Not needed. A frame is < 16383 bytes, so _must_ fit in an skb,
(skb can hold up to 64 Kbytes)

> 
> > +		int i = skb_shinfo(skb)->nr_frags++;
> > +		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
> > +		int fragsize = min_t(int, pkt_size, PAGE_SIZE);
> > +		struct page *page = alloc_page(GFP_NOWAIT | __GFP_NOWARN);
> > +
> > +		if (!page)
> > +			goto error;
> > +		frag->page = page;
> > +		frag->size = fragsize;
> > +		frag->page_offset = 0;
> > +		pkt_size -= fragsize;
> > +	}
> > +	return skb;
> > +
> > +error:
> > +	kfree_skb(skb);
> > +	return NULL;	
> > +}
> > +EXPORT_SYMBOL(alloc_skb_order0);
> > +
> >  /* Checksum skb data. */
> >  
> >  __wsum skb_checksum(const struct sk_buff *skb, int offset,



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] r8169: allocate with GFP_KERNEL flag when able to sleep
  2010-10-11 16:03     ` [PATCH 1/2] r8169: allocate with GFP_KERNEL flag when able to sleep Christoph Lameter
@ 2010-10-11 16:07       ` Eric Dumazet
  2010-10-11 16:14         ` Christoph Lameter
  0 siblings, 1 reply; 18+ messages in thread
From: Eric Dumazet @ 2010-10-11 16:07 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: Stanislaw Gruszka, Francois Romieu, netdev

Le lundi 11 octobre 2010 à 11:03 -0500, Christoph Lameter a écrit :
> On Fri, 8 Oct 2010, Eric Dumazet wrote:
> 
> > 8 in the <pagesperslab> column just says that : order-3 pages, even for
> > small allocations.
> 
> Those allocations will fallback to smaller allocs if the page allocator
> has trouble satisfying those requests.
> 

Interesting, do you have an idea when this feature was added ?

Thanks



^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH 1/2] r8169: allocate with GFP_KERNEL flag when able to sleep
  2010-10-11 16:07       ` Eric Dumazet
@ 2010-10-11 16:14         ` Christoph Lameter
  0 siblings, 0 replies; 18+ messages in thread
From: Christoph Lameter @ 2010-10-11 16:14 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: Stanislaw Gruszka, Francois Romieu, netdev

[-- Attachment #1: Type: TEXT/PLAIN, Size: 498 bytes --]

On Mon, 11 Oct 2010, Eric Dumazet wrote:

> Le lundi 11 octobre 2010 à 11:03 -0500, Christoph Lameter a écrit :
> > On Fri, 8 Oct 2010, Eric Dumazet wrote:
> >
> > > 8 in the <pagesperslab> column just says that : order-3 pages, even for
> > > small allocations.
> >
> > Those allocations will fallback to smaller allocs if the page allocator
> > has trouble satisfying those requests.
> >
>
> Interesting, do you have an idea when this feature was added ?

A couple of years ago.

^ permalink raw reply	[flat|nested] 18+ messages in thread

* Re: [PATCH] net: introduce alloc_skb_order0
  2010-10-11 16:05           ` Eric Dumazet
@ 2010-10-11 21:17             ` Eric Dumazet
  2010-10-16 18:53               ` David Miller
  0 siblings, 1 reply; 18+ messages in thread
From: Eric Dumazet @ 2010-10-11 21:17 UTC (permalink / raw)
  To: Stanislaw Gruszka; +Cc: David Miller, Francois Romieu, netdev


> 1) Allocation of a physically continous 16Kbytes bloc for the rx-ring,
> at device initialization (GFP_KERNEL OK here)
> 
>    Here, the only thing you could do is to not allocate real skbs but
> only 16KB data blocs (no need for the sk_buf, only the ->data part), and
> force copybreak for all incoming packets (remove the rx_copybreak
> tunable)
> 

Here is the patch I cooked for net-next-2.6 to implement this idea.

I tested it on a dev machine and it works well.

[PATCH net-next] r8169: use 50% less ram for RX ring

Using standard skb allocations in r8169 leads to order-3 allocations (if
PAGE_SIZE=4096), because NIC needs 16383 bytes, and skb overhead makes
this bigger than 16384 -> 32768 bytes per "skb"

Using kmalloc() permits to reduce memory requirements of one r8169 nic
by 4Mbytes. (256 frames * 16Kbytes). This is fine since a hardware bug
requires us to copy incoming frames, so we build real skb when doing
this copy.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 drivers/net/r8169.c |  183 ++++++++++++++----------------------------
 1 file changed, 64 insertions(+), 119 deletions(-)

diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index bc669a4..1760533 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -187,12 +187,7 @@ static DEFINE_PCI_DEVICE_TABLE(rtl8169_pci_tbl) = {
 
 MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl);
 
-/*
- * we set our copybreak very high so that we don't have
- * to allocate 16k frames all the time (see note in
- * rtl8169_open()
- */
-static int rx_copybreak = 16383;
+static int rx_buf_sz = 16383;
 static int use_dac;
 static struct {
 	u32 msg_enable;
@@ -484,10 +479,8 @@ struct rtl8169_private {
 	struct RxDesc *RxDescArray;	/* 256-aligned Rx descriptor ring */
 	dma_addr_t TxPhyAddr;
 	dma_addr_t RxPhyAddr;
-	struct sk_buff *Rx_skbuff[NUM_RX_DESC];	/* Rx data buffers */
+	void *Rx_databuff[NUM_RX_DESC];	/* Rx data buffers */
 	struct ring_info tx_skb[NUM_TX_DESC];	/* Tx data buffers */
-	unsigned align;
-	unsigned rx_buf_sz;
 	struct timer_list timer;
 	u16 cp_cmd;
 	u16 intr_event;
@@ -515,8 +508,6 @@ struct rtl8169_private {
 
 MODULE_AUTHOR("Realtek and the Linux r8169 crew <netdev@vger.kernel.org>");
 MODULE_DESCRIPTION("RealTek RTL-8169 Gigabit Ethernet driver");
-module_param(rx_copybreak, int, 0);
-MODULE_PARM_DESC(rx_copybreak, "Copy breakpoint for copy-only-tiny-frames");
 module_param(use_dac, int, 0);
 MODULE_PARM_DESC(use_dac, "Enable PCI DAC. Unsafe on 32 bit PCI slot.");
 module_param_named(debug, debug.msg_enable, int, 0);
@@ -3196,7 +3187,6 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->features |= NETIF_F_GRO;
 
 	tp->intr_mask = 0xffff;
-	tp->align = cfg->align;
 	tp->hw_start = cfg->hw_start;
 	tp->intr_event = cfg->intr_event;
 	tp->napi_event = cfg->napi_event;
@@ -3266,18 +3256,6 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
 	pci_set_drvdata(pdev, NULL);
 }
 
-static void rtl8169_set_rxbufsize(struct rtl8169_private *tp,
-				  unsigned int mtu)
-{
-	unsigned int max_frame = mtu + VLAN_ETH_HLEN + ETH_FCS_LEN;
-
-	if (max_frame != 16383)
-		printk(KERN_WARNING PFX "WARNING! Changing of MTU on this "
-			"NIC may lead to frame reception errors!\n");
-
-	tp->rx_buf_sz = (max_frame > RX_BUF_SIZE) ? max_frame : RX_BUF_SIZE;
-}
-
 static int rtl8169_open(struct net_device *dev)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
@@ -3287,18 +3265,6 @@ static int rtl8169_open(struct net_device *dev)
 	pm_runtime_get_sync(&pdev->dev);
 
 	/*
-	 * Note that we use a magic value here, its wierd I know
-	 * its done because, some subset of rtl8169 hardware suffers from
-	 * a problem in which frames received that are longer than
-	 * the size set in RxMaxSize register return garbage sizes
-	 * when received.  To avoid this we need to turn off filtering,
-	 * which is done by setting a value of 16383 in the RxMaxSize register
-	 * and allocating 16k frames to handle the largest possible rx value
-	 * thats what the magic math below does.
-	 */
-	rtl8169_set_rxbufsize(tp, 16383 - VLAN_ETH_HLEN - ETH_FCS_LEN);
-
-	/*
 	 * Rx and Tx desscriptors needs 256 bytes alignment.
 	 * dma_alloc_coherent provides more.
 	 */
@@ -3474,7 +3440,7 @@ static void rtl_hw_start_8169(struct net_device *dev)
 
 	RTL_W8(EarlyTxThres, EarlyTxThld);
 
-	rtl_set_rx_max_size(ioaddr, tp->rx_buf_sz);
+	rtl_set_rx_max_size(ioaddr, rx_buf_sz);
 
 	if ((tp->mac_version == RTL_GIGA_MAC_VER_01) ||
 	    (tp->mac_version == RTL_GIGA_MAC_VER_02) ||
@@ -3735,7 +3701,7 @@ static void rtl_hw_start_8168(struct net_device *dev)
 
 	RTL_W8(EarlyTxThres, EarlyTxThld);
 
-	rtl_set_rx_max_size(ioaddr, tp->rx_buf_sz);
+	rtl_set_rx_max_size(ioaddr, rx_buf_sz);
 
 	tp->cp_cmd |= RTL_R16(CPlusCmd) | PktCntrDisable | INTT_1;
 
@@ -3915,7 +3881,7 @@ static void rtl_hw_start_8101(struct net_device *dev)
 
 	RTL_W8(EarlyTxThres, EarlyTxThld);
 
-	rtl_set_rx_max_size(ioaddr, tp->rx_buf_sz);
+	rtl_set_rx_max_size(ioaddr, rx_buf_sz);
 
 	tp->cp_cmd |= rtl_rw_cpluscmd(ioaddr) | PCIMulRW;
 
@@ -3956,8 +3922,6 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
 
 	rtl8169_down(dev);
 
-	rtl8169_set_rxbufsize(tp, dev->mtu);
-
 	ret = rtl8169_init_ring(dev);
 	if (ret < 0)
 		goto out;
@@ -3978,15 +3942,15 @@ static inline void rtl8169_make_unusable_by_asic(struct RxDesc *desc)
 	desc->opts1 &= ~cpu_to_le32(DescOwn | RsvdMask);
 }
 
-static void rtl8169_free_rx_skb(struct rtl8169_private *tp,
-				struct sk_buff **sk_buff, struct RxDesc *desc)
+static void rtl8169_free_rx_databuff(struct rtl8169_private *tp,
+				     void **data_buff, struct RxDesc *desc)
 {
 	struct pci_dev *pdev = tp->pci_dev;
 
-	dma_unmap_single(&pdev->dev, le64_to_cpu(desc->addr), tp->rx_buf_sz,
+	dma_unmap_single(&pdev->dev, le64_to_cpu(desc->addr), rx_buf_sz,
 			 PCI_DMA_FROMDEVICE);
-	dev_kfree_skb(*sk_buff);
-	*sk_buff = NULL;
+	kfree(*data_buff);
+	*data_buff = NULL;
 	rtl8169_make_unusable_by_asic(desc);
 }
 
@@ -4005,33 +3969,34 @@ static inline void rtl8169_map_to_asic(struct RxDesc *desc, dma_addr_t mapping,
 	rtl8169_mark_to_asic(desc, rx_buf_sz);
 }
 
-static struct sk_buff *rtl8169_alloc_rx_skb(struct pci_dev *pdev,
+static inline void *rtl8169_align(void *data)
+{
+	return (void *)ALIGN((long)data, 16);
+}
+
+static struct sk_buff *rtl8169_alloc_rx_data(struct pci_dev *pdev,
 					    struct net_device *dev,
-					    struct RxDesc *desc, int rx_buf_sz,
-					    unsigned int align, gfp_t gfp)
+					    struct RxDesc *desc)
 {
-	struct sk_buff *skb;
+	void *data;
 	dma_addr_t mapping;
-	unsigned int pad;
+	int node = dev->dev.parent ? dev_to_node(dev->dev.parent) : -1;
 
-	pad = align ? align : NET_IP_ALIGN;
+	data = kmalloc_node(rx_buf_sz, GFP_KERNEL, node);
+	if (!data)
+		return NULL;
 
-	skb = __netdev_alloc_skb(dev, rx_buf_sz + pad, gfp);
-	if (!skb)
-		goto err_out;
-
-	skb_reserve(skb, align ? ((pad - 1) & (unsigned long)skb->data) : pad);
-
-	mapping = dma_map_single(&pdev->dev, skb->data, rx_buf_sz,
+	if (rtl8169_align(data) != data) {
+		kfree(data);
+		data = kmalloc_node(rx_buf_sz + 15, GFP_KERNEL, node);
+		if (!data)
+			return NULL;
+	}
+	mapping = dma_map_single(&pdev->dev, rtl8169_align(data), rx_buf_sz,
 				 PCI_DMA_FROMDEVICE);
 
 	rtl8169_map_to_asic(desc, mapping, rx_buf_sz);
-out:
-	return skb;
-
-err_out:
-	rtl8169_make_unusable_by_asic(desc);
-	goto out;
+	return data;
 }
 
 static void rtl8169_rx_clear(struct rtl8169_private *tp)
@@ -4039,8 +4004,8 @@ static void rtl8169_rx_clear(struct rtl8169_private *tp)
 	unsigned int i;
 
 	for (i = 0; i < NUM_RX_DESC; i++) {
-		if (tp->Rx_skbuff[i]) {
-			rtl8169_free_rx_skb(tp, tp->Rx_skbuff + i,
+		if (tp->Rx_databuff[i]) {
+			rtl8169_free_rx_databuff(tp, tp->Rx_databuff + i,
 					    tp->RxDescArray + i);
 		}
 	}
@@ -4052,21 +4017,21 @@ static u32 rtl8169_rx_fill(struct rtl8169_private *tp, struct net_device *dev,
 	u32 cur;
 
 	for (cur = start; end - cur != 0; cur++) {
-		struct sk_buff *skb;
+		void *data;
 		unsigned int i = cur % NUM_RX_DESC;
 
 		WARN_ON((s32)(end - cur) < 0);
 
-		if (tp->Rx_skbuff[i])
+		if (tp->Rx_databuff[i])
 			continue;
 
-		skb = rtl8169_alloc_rx_skb(tp->pci_dev, dev,
-					   tp->RxDescArray + i,
-					   tp->rx_buf_sz, tp->align, gfp);
-		if (!skb)
+		data = rtl8169_alloc_rx_data(tp->pci_dev, dev,
+					     tp->RxDescArray + i);
+		if (!data) {
+			rtl8169_make_unusable_by_asic(tp->RxDescArray + i);
 			break;
-
-		tp->Rx_skbuff[i] = skb;
+		}
+		tp->Rx_databuff[i] = data;
 	}
 	return cur - start;
 }
@@ -4088,7 +4053,7 @@ static int rtl8169_init_ring(struct net_device *dev)
 	rtl8169_init_ring_indexes(tp);
 
 	memset(tp->tx_skb, 0x0, NUM_TX_DESC * sizeof(struct ring_info));
-	memset(tp->Rx_skbuff, 0x0, NUM_RX_DESC * sizeof(struct sk_buff *));
+	memset(tp->Rx_databuff, 0x0, NUM_RX_DESC * sizeof(void *));
 
 	if (rtl8169_rx_fill(tp, dev, 0, NUM_RX_DESC, GFP_KERNEL) != NUM_RX_DESC)
 		goto err_out;
@@ -4473,27 +4438,23 @@ static inline void rtl8169_rx_csum(struct sk_buff *skb, u32 opts1)
 		skb_checksum_none_assert(skb);
 }
 
-static inline bool rtl8169_try_rx_copy(struct sk_buff **sk_buff,
-				       struct rtl8169_private *tp, int pkt_size,
-				       dma_addr_t addr)
+static struct sk_buff *rtl8169_try_rx_copy(void *data,
+					   struct rtl8169_private *tp,
+					   int pkt_size,
+					   dma_addr_t addr)
 {
 	struct sk_buff *skb;
-	bool done = false;
-
-	if (pkt_size >= rx_copybreak)
-		goto out;
-
-	skb = netdev_alloc_skb_ip_align(tp->dev, pkt_size);
-	if (!skb)
-		goto out;
 
+	data = rtl8169_align(data);
 	dma_sync_single_for_cpu(&tp->pci_dev->dev, addr, pkt_size,
 				PCI_DMA_FROMDEVICE);
-	skb_copy_from_linear_data(*sk_buff, skb->data, pkt_size);
-	*sk_buff = skb;
-	done = true;
-out:
-	return done;
+	prefetch(data);
+	skb = netdev_alloc_skb_ip_align(tp->dev, pkt_size);
+	if (skb)
+		memcpy(skb->data, data, pkt_size);
+	dma_sync_single_for_device(&tp->pci_dev->dev, addr, pkt_size,
+				   PCI_DMA_FROMDEVICE);
+	return skb;
 }
 
 /*
@@ -4508,7 +4469,7 @@ static int rtl8169_rx_interrupt(struct net_device *dev,
 				void __iomem *ioaddr, u32 budget)
 {
 	unsigned int cur_rx, rx_left;
-	unsigned int delta, count;
+	unsigned int count;
 	int polling = (budget != ~(u32)0) ? 1 : 0;
 
 	cur_rx = tp->cur_rx;
@@ -4537,12 +4498,11 @@ static int rtl8169_rx_interrupt(struct net_device *dev,
 				rtl8169_schedule_work(dev, rtl8169_reset_task);
 				dev->stats.rx_fifo_errors++;
 			}
-			rtl8169_mark_to_asic(desc, tp->rx_buf_sz);
+			rtl8169_mark_to_asic(desc, rx_buf_sz);
 		} else {
-			struct sk_buff *skb = tp->Rx_skbuff[entry];
+			struct sk_buff *skb;
 			dma_addr_t addr = le64_to_cpu(desc->addr);
 			int pkt_size = (status & 0x00001FFF) - 4;
-			struct pci_dev *pdev = tp->pci_dev;
 
 			/*
 			 * The driver does not support incoming fragmented
@@ -4552,18 +4512,16 @@ static int rtl8169_rx_interrupt(struct net_device *dev,
 			if (unlikely(rtl8169_fragmented_frame(status))) {
 				dev->stats.rx_dropped++;
 				dev->stats.rx_length_errors++;
-				rtl8169_mark_to_asic(desc, tp->rx_buf_sz);
+				rtl8169_mark_to_asic(desc, rx_buf_sz);
 				continue;
 			}
 
-			if (rtl8169_try_rx_copy(&skb, tp, pkt_size, addr)) {
-				dma_sync_single_for_device(&pdev->dev, addr,
-					pkt_size, PCI_DMA_FROMDEVICE);
-				rtl8169_mark_to_asic(desc, tp->rx_buf_sz);
-			} else {
-				dma_unmap_single(&pdev->dev, addr, tp->rx_buf_sz,
-						 PCI_DMA_FROMDEVICE);
-				tp->Rx_skbuff[entry] = NULL;
+			skb = rtl8169_try_rx_copy(tp->Rx_databuff[entry],
+						  tp, pkt_size, addr);
+			rtl8169_mark_to_asic(desc, rx_buf_sz);
+			if (!skb) {
+				dev->stats.rx_dropped++;
+				continue;
 			}
 
 			rtl8169_rx_csum(skb, status);
@@ -4592,20 +4550,7 @@ static int rtl8169_rx_interrupt(struct net_device *dev,
 	count = cur_rx - tp->cur_rx;
 	tp->cur_rx = cur_rx;
 
-	delta = rtl8169_rx_fill(tp, dev, tp->dirty_rx, tp->cur_rx, GFP_ATOMIC);
-	if (!delta && count)
-		netif_info(tp, intr, dev, "no Rx buffer allocated\n");
-	tp->dirty_rx += delta;
-
-	/*
-	 * FIXME: until there is periodic timer to try and refill the ring,
-	 * a temporary shortage may definitely kill the Rx process.
-	 * - disable the asic to try and avoid an overflow and kick it again
-	 *   after refill ?
-	 * - how do others driver handle this condition (Uh oh...).
-	 */
-	if (tp->dirty_rx + NUM_RX_DESC == tp->cur_rx)
-		netif_emerg(tp, intr, dev, "Rx buffers exhausted\n");
+	tp->dirty_rx += count;
 
 	return count;
 }



^ permalink raw reply related	[flat|nested] 18+ messages in thread

* Re: [PATCH] net: introduce alloc_skb_order0
  2010-10-11 21:17             ` Eric Dumazet
@ 2010-10-16 18:53               ` David Miller
  0 siblings, 0 replies; 18+ messages in thread
From: David Miller @ 2010-10-16 18:53 UTC (permalink / raw)
  To: eric.dumazet; +Cc: sgruszka, romieu, netdev

From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 11 Oct 2010 23:17:47 +0200

> [PATCH net-next] r8169: use 50% less ram for RX ring
> 
> Using standard skb allocations in r8169 leads to order-3 allocations (if
> PAGE_SIZE=4096), because NIC needs 16383 bytes, and skb overhead makes
> this bigger than 16384 -> 32768 bytes per "skb"
> 
> Using kmalloc() permits to reduce memory requirements of one r8169 nic
> by 4Mbytes. (256 frames * 16Kbytes). This is fine since a hardware bug
> requires us to copy incoming frames, so we build real skb when doing
> this copy.
> 
> Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>

Applied.

^ permalink raw reply	[flat|nested] 18+ messages in thread

end of thread, other threads:[~2010-10-16 18:53 UTC | newest]

Thread overview: 18+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-10-08 14:25 [PATCH 1/2] r8169: allocate with GFP_KERNEL flag when able to sleep Stanislaw Gruszka
2010-10-08 14:25 ` [PATCH 2/2] r8169: use device model DMA API Stanislaw Gruszka
2010-10-09  7:57   ` Eric Dumazet
2010-10-09 16:17     ` David Miller
2010-10-08 14:52 ` [PATCH 1/2] r8169: allocate with GFP_KERNEL flag when able to sleep Stanislaw Gruszka
2010-10-08 15:04   ` Eric Dumazet
2010-10-08 16:03     ` Stanislaw Gruszka
2010-10-08 16:27       ` Eric Dumazet
2010-10-09 15:59       ` [PATCH] net: introduce alloc_skb_order0 Eric Dumazet
2010-10-11 15:55         ` Stanislaw Gruszka
2010-10-11 16:05           ` Eric Dumazet
2010-10-11 21:17             ` Eric Dumazet
2010-10-16 18:53               ` David Miller
2010-10-11 16:03     ` [PATCH 1/2] r8169: allocate with GFP_KERNEL flag when able to sleep Christoph Lameter
2010-10-11 16:07       ` Eric Dumazet
2010-10-11 16:14         ` Christoph Lameter
2010-10-09  7:54 ` Eric Dumazet
2010-10-09 16:17   ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).