linux-wireless.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] wifi: mt76: fix potential memory leakage
@ 2022-12-19  4:48 Bo Jiao
  2022-12-19  9:55 ` Lorenzo Bianconi
  0 siblings, 1 reply; 14+ messages in thread
From: Bo Jiao @ 2022-12-19  4:48 UTC (permalink / raw)
  To: Felix Fietkau
  Cc: linux-wireless, Ryder Lee, Sujuan Chen, Shayne Chen, Evelyn Tsai,
	linux-mediatek, Bo Jiao

From: Bo Jiao <Bo.Jiao@mediatek.com>

fix potential memory leakage, recycle rxwi when mt76_dma_add_buf() call fail.

Signed-off-by: Bo Jiao <Bo.Jiao@mediatek.com>
---
v2:
- recycle rxwi when page_frag_alloc() and dma_map_single() fail.
---
 drivers/net/wireless/mediatek/mt76/dma.c | 27 ++++++++++++++----------
 1 file changed, 16 insertions(+), 11 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index fc24b35..76ad47d 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -580,24 +580,29 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
 
 		buf = page_frag_alloc(&q->rx_page, q->buf_size, GFP_ATOMIC);
 		if (!buf)
-			break;
+			goto out;
 
 		addr = dma_map_single(dev->dma_dev, buf, len, DMA_FROM_DEVICE);
-		if (unlikely(dma_mapping_error(dev->dma_dev, addr))) {
-			skb_free_frag(buf);
-			break;
-		}
+		if (unlikely(dma_mapping_error(dev->dma_dev, addr)))
+			goto free;
 
 		qbuf.addr = addr + offset;
 		qbuf.len = len - offset;
 		qbuf.skip_unmap = false;
-		if (mt76_dma_add_buf(dev, q, &qbuf, 1, 0, buf, t) < 0) {
-			dma_unmap_single(dev->dma_dev, addr, len,
-					 DMA_FROM_DEVICE);
-			skb_free_frag(buf);
-			break;
-		}
+		if (mt76_dma_add_buf(dev, q, &qbuf, 1, 0, buf, t) < 0)
+			goto umap;
+
 		frames++;
+		continue;
+
+umap:
+		dma_unmap_single(dev->dma_dev, addr, len,
+				 DMA_FROM_DEVICE);
+free:
+		skb_free_frag(buf);
+out:
+		mt76_put_rxwi(dev, t);
+		break;
 	}
 
 	if (frames)
-- 
2.18.0


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] wifi: mt76: fix potential memory leakage
  2022-12-19  4:48 [PATCH v2] wifi: mt76: fix potential memory leakage Bo Jiao
@ 2022-12-19  9:55 ` Lorenzo Bianconi
  2022-12-20  4:42   ` Sujuan Chen (陈素娟)
  0 siblings, 1 reply; 14+ messages in thread
From: Lorenzo Bianconi @ 2022-12-19  9:55 UTC (permalink / raw)
  To: Bo Jiao
  Cc: Felix Fietkau, linux-wireless, Ryder Lee, Sujuan Chen,
	Shayne Chen, Evelyn Tsai, linux-mediatek

[-- Attachment #1: Type: text/plain, Size: 3286 bytes --]

> From: Bo Jiao <Bo.Jiao@mediatek.com>
> 
> fix potential memory leakage, recycle rxwi when mt76_dma_add_buf() call fail.
> 
> Signed-off-by: Bo Jiao <Bo.Jiao@mediatek.com>
> ---
> v2:
> - recycle rxwi when page_frag_alloc() and dma_map_single() fail.
> ---
>  drivers/net/wireless/mediatek/mt76/dma.c | 27 ++++++++++++++----------
>  1 file changed, 16 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
> index fc24b35..76ad47d 100644
> --- a/drivers/net/wireless/mediatek/mt76/dma.c
> +++ b/drivers/net/wireless/mediatek/mt76/dma.c
> @@ -580,24 +580,29 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
>  
>  		buf = page_frag_alloc(&q->rx_page, q->buf_size, GFP_ATOMIC);
>  		if (!buf)
> -			break;
> +			goto out;
>  
>  		addr = dma_map_single(dev->dma_dev, buf, len, DMA_FROM_DEVICE);
> -		if (unlikely(dma_mapping_error(dev->dma_dev, addr))) {
> -			skb_free_frag(buf);
> -			break;
> -		}
> +		if (unlikely(dma_mapping_error(dev->dma_dev, addr)))
> +			goto free;
>  
>  		qbuf.addr = addr + offset;
>  		qbuf.len = len - offset;
>  		qbuf.skip_unmap = false;
> -		if (mt76_dma_add_buf(dev, q, &qbuf, 1, 0, buf, t) < 0) {
> -			dma_unmap_single(dev->dma_dev, addr, len,
> -					 DMA_FROM_DEVICE);
> -			skb_free_frag(buf);
> -			break;
> -		}
> +		if (mt76_dma_add_buf(dev, q, &qbuf, 1, 0, buf, t) < 0)
> +			goto umap;
> +
>  		frames++;
> +		continue;
> +
> +umap:
> +		dma_unmap_single(dev->dma_dev, addr, len,
> +				 DMA_FROM_DEVICE);
> +free:
> +		skb_free_frag(buf);
> +out:
> +		mt76_put_rxwi(dev, t);
> +		break;
>  	}
>  
>  	if (frames)

Hi Bo,

I guess in the way below, the code is more readable, what do you think?

Regards,
Lorenzo

diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index fad5fe19fe18..001538f698f1 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -571,13 +571,6 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
 		struct mt76_queue_buf qbuf;
 		void *buf = NULL;
 
-		if ((q->flags & MT_QFLAG_WED) &&
-		    FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) == MT76_WED_Q_RX) {
-			t = mt76_get_rxwi(dev);
-			if (!t)
-				break;
-		}
-
 		buf = page_frag_alloc(&q->rx_page, q->buf_size, GFP_ATOMIC);
 		if (!buf)
 			break;
@@ -588,16 +581,27 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
 			break;
 		}
 
+		if ((q->flags & MT_QFLAG_WED) &&
+		    FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) == MT76_WED_Q_RX) {
+			t = mt76_get_rxwi(dev);
+			if (!t)
+				goto unmap;
+		}
+
 		qbuf.addr = addr + offset;
 		qbuf.len = len - offset;
 		qbuf.skip_unmap = false;
-		if (mt76_dma_add_buf(dev, q, &qbuf, 1, 0, buf, t) < 0) {
-			dma_unmap_single(dev->dma_dev, addr, len,
-					 DMA_FROM_DEVICE);
-			skb_free_frag(buf);
-			break;
+
+		if (!mt76_dma_add_buf(dev, q, &qbuf, 1, 0, buf, t)) {
+			frames++;
+			continue;
 		}
-		frames++;
+
+unmap:
+		dma_unmap_single(dev->dma_dev, addr, len, DMA_FROM_DEVICE);
+		skb_free_frag(buf);
+		mt76_put_rxwi(dev, t);
+		break;
 	}
 
 	if (frames)

> -- 
> 2.18.0
> 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] wifi: mt76: fix potential memory leakage
  2022-12-19  9:55 ` Lorenzo Bianconi
@ 2022-12-20  4:42   ` Sujuan Chen (陈素娟)
  2022-12-21 10:26     ` lorenzo
  0 siblings, 1 reply; 14+ messages in thread
From: Sujuan Chen (陈素娟) @ 2022-12-20  4:42 UTC (permalink / raw)
  To: Bo Jiao (焦波), lorenzo@kernel.org
  Cc: linux-wireless@vger.kernel.org,
	Shayne Chen (陳軒丞), nbd@nbd.name,
	Evelyn Tsai (蔡珊鈺), Ryder Lee,
	linux-mediatek@lists.infradead.org

On Mon, 2022-12-19 at 10:55 +0100, Lorenzo Bianconi wrote:
> > From: Bo Jiao <Bo.Jiao@mediatek.com>
> > 
> > fix potential memory leakage, recycle rxwi when mt76_dma_add_buf()
> > call fail.
> > 
> > Signed-off-by: Bo Jiao <Bo.Jiao@mediatek.com>
> > ---
> > v2:
> > - recycle rxwi when page_frag_alloc() and dma_map_single() fail.
> > ---
> >  drivers/net/wireless/mediatek/mt76/dma.c | 27 ++++++++++++++----
> > ------
> >  1 file changed, 16 insertions(+), 11 deletions(-)
> > 
> > diff --git a/drivers/net/wireless/mediatek/mt76/dma.c
> > b/drivers/net/wireless/mediatek/mt76/dma.c
> > index fc24b35..76ad47d 100644
> > --- a/drivers/net/wireless/mediatek/mt76/dma.c
> > +++ b/drivers/net/wireless/mediatek/mt76/dma.c
> > @@ -580,24 +580,29 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct
> > mt76_queue *q)
> >  
> >  		buf = page_frag_alloc(&q->rx_page, q->buf_size,
> > GFP_ATOMIC);
> >  		if (!buf)
> > -			break;
> > +			goto out;
> >  
> >  		addr = dma_map_single(dev->dma_dev, buf, len,
> > DMA_FROM_DEVICE);
> > -		if (unlikely(dma_mapping_error(dev->dma_dev, addr))) {
> > -			skb_free_frag(buf);
> > -			break;
> > -		}
> > +		if (unlikely(dma_mapping_error(dev->dma_dev, addr)))
> > +			goto free;
> >  
> >  		qbuf.addr = addr + offset;
> >  		qbuf.len = len - offset;
> >  		qbuf.skip_unmap = false;
> > -		if (mt76_dma_add_buf(dev, q, &qbuf, 1, 0, buf, t) < 0)
> > {
> > -			dma_unmap_single(dev->dma_dev, addr, len,
> > -					 DMA_FROM_DEVICE);
> > -			skb_free_frag(buf);
> > -			break;
> > -		}
> > +		if (mt76_dma_add_buf(dev, q, &qbuf, 1, 0, buf, t) < 0)
> > +			goto umap;
> > +
> >  		frames++;
> > +		continue;
> > +
> > +umap:
> > +		dma_unmap_single(dev->dma_dev, addr, len,
> > +				 DMA_FROM_DEVICE);
> > +free:
> > +		skb_free_frag(buf);
> > +out:
> > +		mt76_put_rxwi(dev, t);
> > +		break;
> >  	}
> >  
> >  	if (frames)
> 
> Hi Bo,
> 
> I guess in the way below, the code is more readable, what do you
> think?
> 
> Regards,
> Lorenzo
> 
> diff --git a/drivers/net/wireless/mediatek/mt76/dma.c
> b/drivers/net/wireless/mediatek/mt76/dma.c
> index fad5fe19fe18..001538f698f1 100644
> --- a/drivers/net/wireless/mediatek/mt76/dma.c
> +++ b/drivers/net/wireless/mediatek/mt76/dma.c
> @@ -571,13 +571,6 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct
> mt76_queue *q)
>  		struct mt76_queue_buf qbuf;
>  		void *buf = NULL;
>  
> -		if ((q->flags & MT_QFLAG_WED) &&
> -		    FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) ==
> MT76_WED_Q_RX) {
> -			t = mt76_get_rxwi(dev);
> -			if (!t)
> -				break;
> -		}
> -
>  		buf = page_frag_alloc(&q->rx_page, q->buf_size,
> GFP_ATOMIC);
>  		if (!buf)
>  			break;
> @@ -588,16 +581,27 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct
> mt76_queue *q)
>  			break;
>  		}
>  
> +		if ((q->flags & MT_QFLAG_WED) &&
> +		    FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) ==
> MT76_WED_Q_RX) {
> +			t = mt76_get_rxwi(dev);
> +			if (!t)
> +				goto unmap;
> +		}
> +
>  		qbuf.addr = addr + offset;
>  		qbuf.len = len - offset;
>  		qbuf.skip_unmap = false;
> -		if (mt76_dma_add_buf(dev, q, &qbuf, 1, 0, buf, t) < 0)
> {
> -			dma_unmap_single(dev->dma_dev, addr, len,
> -					 DMA_FROM_DEVICE);
> -			skb_free_frag(buf);
> -			break;
> +
> +		if (!mt76_dma_add_buf(dev, q, &qbuf, 1, 0, buf, t)) {
> +			frames++;
> +			continue;
>  		}
> -		frames++;
> +
> +unmap:
> +		dma_unmap_single(dev->dma_dev, addr, len,
> DMA_FROM_DEVICE);
> +		skb_free_frag(buf);
> +		mt76_put_rxwi(dev, t);
> +		break;
>  	}
>  
>  	if (frames)
> 
Hi Lore,

we love your patch, but we have another patch to avoid memory
fragmentation by duplicating the rx skb after mt76_dma_dequeue(). it
requires mt76_get_rxwi() be placed before page_frag_alloc().

the below patch(need rebase) will be sent after the current patch is
accepted.

--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -386,9 +386,11 @@ mt76_dma_get_buf(struct mt76_dev *dev, struct
mt76_queue *q, int idx,
                                 SKB_WITH_OVERHEAD(q->buf_size),
                                 DMA_FROM_DEVICE);

-               buf = t->ptr;
+               buf = page_frag_alloc(&q->rx_page, q->buf_size,
GFP_ATOMIC);
+               if (!buf)
+                       return NULL;
+               memcpy(buf, t->ptr, SKB_WITH_OVERHEAD(q->buf_size));
                t->dma_addr = 0;
-               t->ptr = NULL;

                mt76_put_rxwi(dev, t);

@@ -569,6 +571,7 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct
mt76_queue *q)
        while (q->queued < q->ndesc - 1) {
                struct mt76_txwi_cache *t = NULL;
                struct mt76_queue_buf qbuf;
+               bool skip_alloc = false;
                void *buf = NULL;

                if ((q->flags & MT_QFLAG_WED) &&
@@ -576,11 +579,19 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct
mt76_queue *q)
                        t = mt76_get_rxwi(dev);
                        if (!t)
                                break;
+
+                       if (t->ptr) {
+                               skip_alloc = true;
+                               buf = t->ptr;
+                       }
                }

-               buf = page_frag_alloc(&q->rx_page, q->buf_size,
GFP_ATOMIC);
-               if (!buf)
-                       break;
+               if (!skip_alloc) {
+                       buf = page_frag_alloc(&q->rx_page, q->buf_size,
+                                             GFP_ATOMIC);
+                       if (!buf)
+                               break;
+               }

                addr = dma_map_single(dev->dma_dev, buf, len,
DMA_FROM_DEVICE);
                if (unlikely(dma_mapping_error(dev->dma_dev, addr))) {
--
2.18.0

> > -- 
> > 2.18.0
> > 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] wifi: mt76: fix potential memory leakage
  2022-12-20  4:42   ` Sujuan Chen (陈素娟)
@ 2022-12-21 10:26     ` lorenzo
  2022-12-21 14:48       ` Sujuan Chen (陈素娟)
  0 siblings, 1 reply; 14+ messages in thread
From: lorenzo @ 2022-12-21 10:26 UTC (permalink / raw)
  To: Sujuan Chen (陈素娟)
  Cc: Bo Jiao (焦波), linux-wireless@vger.kernel.org,
	Shayne Chen (陳軒丞), nbd@nbd.name,
	Evelyn Tsai (蔡珊鈺), Ryder Lee,
	linux-mediatek@lists.infradead.org

[-- Attachment #1: Type: text/plain, Size: 2733 bytes --]

[...]

> Hi Lore,

Hi Sujuan,

> 
> we love your patch, but we have another patch to avoid memory
> fragmentation by duplicating the rx skb after mt76_dma_dequeue(). it
> requires mt76_get_rxwi() be placed before page_frag_alloc().
> 
> the below patch(need rebase) will be sent after the current patch is
> accepted.
> 
> --- a/drivers/net/wireless/mediatek/mt76/dma.c
> +++ b/drivers/net/wireless/mediatek/mt76/dma.c
> @@ -386,9 +386,11 @@ mt76_dma_get_buf(struct mt76_dev *dev, struct
> mt76_queue *q, int idx,
>                                  SKB_WITH_OVERHEAD(q->buf_size),
>                                  DMA_FROM_DEVICE);
> 
> -               buf = t->ptr;
> +               buf = page_frag_alloc(&q->rx_page, q->buf_size,
> GFP_ATOMIC);
> +               if (!buf)
> +                       return NULL;
> +               memcpy(buf, t->ptr, SKB_WITH_OVERHEAD(q->buf_size));

We this approach we still need to allocate the buffer (in mt76_dma_get_buf()
instead of mt76_dma_rx_fill()) but we need even to copy the full buffer that
can be pretty expensive instead of relying on the DMA, so I would avoid this
approach.

Regards,
Lorenzo

>                 t->dma_addr = 0;
> -               t->ptr = NULL;
> 
>                 mt76_put_rxwi(dev, t);
> 
> @@ -569,6 +571,7 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct
> mt76_queue *q)
>         while (q->queued < q->ndesc - 1) {
>                 struct mt76_txwi_cache *t = NULL;
>                 struct mt76_queue_buf qbuf;
> +               bool skip_alloc = false;
>                 void *buf = NULL;
> 
>                 if ((q->flags & MT_QFLAG_WED) &&
> @@ -576,11 +579,19 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct
> mt76_queue *q)
>                         t = mt76_get_rxwi(dev);
>                         if (!t)
>                                 break;
> +
> +                       if (t->ptr) {
> +                               skip_alloc = true;
> +                               buf = t->ptr;
> +                       }
>                 }
> 
> -               buf = page_frag_alloc(&q->rx_page, q->buf_size,
> GFP_ATOMIC);
> -               if (!buf)
> -                       break;
> +               if (!skip_alloc) {
> +                       buf = page_frag_alloc(&q->rx_page, q->buf_size,
> +                                             GFP_ATOMIC);
> +                       if (!buf)
> +                               break;
> +               }
> 
>                 addr = dma_map_single(dev->dma_dev, buf, len,
> DMA_FROM_DEVICE);
>                 if (unlikely(dma_mapping_error(dev->dma_dev, addr))) {
> --
> 2.18.0
> 
> > > -- 
> > > 2.18.0
> > > 

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] wifi: mt76: fix potential memory leakage
  2022-12-21 10:26     ` lorenzo
@ 2022-12-21 14:48       ` Sujuan Chen (陈素娟)
  2022-12-21 15:12         ` Felix Fietkau
  0 siblings, 1 reply; 14+ messages in thread
From: Sujuan Chen (陈素娟) @ 2022-12-21 14:48 UTC (permalink / raw)
  To: lorenzo@kernel.org
  Cc: linux-wireless@vger.kernel.org,
	Shayne Chen (陳軒丞),
	Bo Jiao (焦波), nbd@nbd.name,
	Evelyn Tsai (蔡珊鈺), Ryder Lee,
	linux-mediatek@lists.infradead.org

On Wed, 2022-12-21 at 11:26 +0100, lorenzo@kernel.org wrote:
> [...]
> 
> > Hi Lore,
> 
> Hi Sujuan,
> 
> > 
> > we love your patch, but we have another patch to avoid memory
> > fragmentation by duplicating the rx skb after mt76_dma_dequeue().
> > it
> > requires mt76_get_rxwi() be placed before page_frag_alloc().
> > 
> > the below patch(need rebase) will be sent after the current patch
> > is
> > accepted.
> > 
> > --- a/drivers/net/wireless/mediatek/mt76/dma.c
> > +++ b/drivers/net/wireless/mediatek/mt76/dma.c
> > @@ -386,9 +386,11 @@ mt76_dma_get_buf(struct mt76_dev *dev, struct
> > mt76_queue *q, int idx,
> >                                  SKB_WITH_OVERHEAD(q->buf_size),
> >                                  DMA_FROM_DEVICE);
> > 
> > -               buf = t->ptr;
> > +               buf = page_frag_alloc(&q->rx_page, q->buf_size,
> > GFP_ATOMIC);
> > +               if (!buf)
> > +                       return NULL;
> > +               memcpy(buf, t->ptr, SKB_WITH_OVERHEAD(q-
> > >buf_size));
> 
> We this approach we still need to allocate the buffer (in
> mt76_dma_get_buf()
> instead of mt76_dma_rx_fill()) but we need even to copy the full
> buffer that
> can be pretty expensive instead of relying on the DMA, so I would
> avoid this
> approach.
> 

Hi Lore,

Yes, it is so expensive, but if no memcopy, it will casue memory
fragmentation (we hit this issue in internal SQC).

as we know, wed needs to exchange rx pkt(belongs to wed rx buffer
manager) with wifi driver(dma rx data queue) by exchanging wfdma dmad
to ensure the free wed rx buffer.

it is possiable that a large number of buffer has been exchanged to wed
and can not come back to wlan driver. So, the memory from the same 32K
page cache is unable to be released, and it will be failed at
page_frag_alloc in mt76_dma_rx_fill.

Any ideas but memcopy?

Regards,
Sujuan

> Regards,
> Lorenzo
> 
> >                 t->dma_addr = 0;
> > -               t->ptr = NULL;
> > 
> >                 mt76_put_rxwi(dev, t);
> > 
> > @@ -569,6 +571,7 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct
> > mt76_queue *q)
> >         while (q->queued < q->ndesc - 1) {
> >                 struct mt76_txwi_cache *t = NULL;
> >                 struct mt76_queue_buf qbuf;
> > +               bool skip_alloc = false;
> >                 void *buf = NULL;
> > 
> >                 if ((q->flags & MT_QFLAG_WED) &&
> > @@ -576,11 +579,19 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct
> > mt76_queue *q)
> >                         t = mt76_get_rxwi(dev);
> >                         if (!t)
> >                                 break;
> > +
> > +                       if (t->ptr) {
> > +                               skip_alloc = true;
> > +                               buf = t->ptr;
> > +                       }
> >                 }
> > 
> > -               buf = page_frag_alloc(&q->rx_page, q->buf_size,
> > GFP_ATOMIC);
> > -               if (!buf)
> > -                       break;
> > +               if (!skip_alloc) {
> > +                       buf = page_frag_alloc(&q->rx_page, q-
> > >buf_size,
> > +                                             GFP_ATOMIC);
> > +                       if (!buf)
> > +                               break;
> > +               }
> > 
> >                 addr = dma_map_single(dev->dma_dev, buf, len,
> > DMA_FROM_DEVICE);
> >                 if (unlikely(dma_mapping_error(dev->dma_dev,
> > addr))) {
> > --
> > 2.18.0
> > 
> > > > -- 
> > > > 2.18.0
> > > > 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] wifi: mt76: fix potential memory leakage
  2022-12-21 14:48       ` Sujuan Chen (陈素娟)
@ 2022-12-21 15:12         ` Felix Fietkau
  2022-12-21 18:02           ` Lorenzo Bianconi
  0 siblings, 1 reply; 14+ messages in thread
From: Felix Fietkau @ 2022-12-21 15:12 UTC (permalink / raw)
  To: Sujuan Chen
  Cc: lorenzo, linux-wireless, Shayne Chen, Bo Jiao, Evelyn Tsai,
	Ryder Lee, linux-mediatek

Hi Sujuan,

> Yes, it is so expensive, but if no memcopy, it will casue memory
> fragmentation (we hit this issue in internal SQC).
> 
> as we know, wed needs to exchange rx pkt(belongs to wed rx buffer
> manager) with wifi driver(dma rx data queue) by exchanging wfdma dmad
> to ensure the free wed rx buffer.
> 
> it is possiable that a large number of buffer has been exchanged to wed
> and can not come back to wlan driver. So, the memory from the same 32K
> page cache is unable to be released, and it will be failed at
> page_frag_alloc in mt76_dma_rx_fill.
> 
> Any ideas but memcopy?
A simple solution would be to simply allocate single pages, or half-page fragments.

- Felix


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] wifi: mt76: fix potential memory leakage
  2022-12-21 15:12         ` Felix Fietkau
@ 2022-12-21 18:02           ` Lorenzo Bianconi
  2022-12-22  8:51             ` Sujuan Chen (陈素娟)
  2023-01-03  1:55             ` Sujuan Chen (陈素娟)
  0 siblings, 2 replies; 14+ messages in thread
From: Lorenzo Bianconi @ 2022-12-21 18:02 UTC (permalink / raw)
  To: Felix Fietkau
  Cc: Sujuan Chen, linux-wireless, Shayne Chen, Bo Jiao, Evelyn Tsai,
	Ryder Lee, linux-mediatek

[-- Attachment #1: Type: text/plain, Size: 4687 bytes --]

On Dec 21, Felix Fietkau wrote:
> Hi Sujuan,
> 
> > Yes, it is so expensive, but if no memcopy, it will casue memory
> > fragmentation (we hit this issue in internal SQC).
> > 
> > as we know, wed needs to exchange rx pkt(belongs to wed rx buffer
> > manager) with wifi driver(dma rx data queue) by exchanging wfdma dmad
> > to ensure the free wed rx buffer.
> > 
> > it is possiable that a large number of buffer has been exchanged to wed
> > and can not come back to wlan driver. So, the memory from the same 32K
> > page cache is unable to be released, and it will be failed at
> > page_frag_alloc in mt76_dma_rx_fill.
> > 
> > Any ideas but memcopy?
> A simple solution would be to simply allocate single pages, or half-page fragments.
> 
> - Felix
> 

A simple approach would be allocating a single page for each rx buffer.

@Sujuan: can you please double check if it is ok from performance and memory
	 fragmentation point of view? If not I guess we can try to optimize it
	 and allocate multiple buffers in the same page tweeking page refcount.

(this patch must be applied on top of Felix's dma fix).

Regards,
Lorenzo

diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index 28a7fe064313..1d9e580977fc 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -580,6 +580,20 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q,
 	return ret;
 }
 
+static void *
+mt76_dma_get_rx_buf(struct mt76_queue *q)
+{
+	if ((q->flags & MT_QFLAG_WED) &&
+	    FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) == MT76_WED_Q_RX) {
+		/* WED RX queue */
+		struct page *page = dev_alloc_page();
+
+		return page ? page_address(page) : NULL;
+	}
+
+	return page_frag_alloc(&q->rx_page, q->buf_size, GFP_ATOMIC);
+}
+
 static int
 mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
 {
@@ -596,7 +610,7 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
 		struct mt76_queue_buf qbuf;
 		void *buf = NULL;
 
-		buf = page_frag_alloc(&q->rx_page, q->buf_size, GFP_ATOMIC);
+		buf = mt76_dma_get_rx_buf(q);
 		if (!buf)
 			break;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
index 1a2e4df8d1b5..2924e71e4fbe 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
@@ -594,13 +594,9 @@ static void mt7915_mmio_wed_offload_disable(struct mtk_wed_device *wed)
 static void mt7915_mmio_wed_release_rx_buf(struct mtk_wed_device *wed)
 {
 	struct mt7915_dev *dev;
-	u32 length;
 	int i;
 
 	dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed);
-	length = SKB_DATA_ALIGN(NET_SKB_PAD + wed->wlan.rx_size +
-				sizeof(struct skb_shared_info));
-
 	for (i = 0; i < dev->mt76.rx_token_size; i++) {
 		struct mt76_txwi_cache *t;
 
@@ -610,7 +606,7 @@ static void mt7915_mmio_wed_release_rx_buf(struct mtk_wed_device *wed)
 
 		dma_unmap_single(dev->mt76.dma_dev, t->dma_addr,
 				 wed->wlan.rx_size, DMA_FROM_DEVICE);
-		__free_pages(virt_to_page(t->ptr), get_order(length));
+		free_page(virt_to_page(t->ptr));
 		t->ptr = NULL;
 
 		mt76_put_rxwi(&dev->mt76, t);
@@ -621,13 +617,9 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct mtk_wed_device *wed, int size)
 {
 	struct mtk_rxbm_desc *desc = wed->rx_buf_ring.desc;
 	struct mt7915_dev *dev;
-	u32 length;
 	int i;
 
 	dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed);
-	length = SKB_DATA_ALIGN(NET_SKB_PAD + wed->wlan.rx_size +
-				sizeof(struct skb_shared_info));
-
 	for (i = 0; i < size; i++) {
 		struct mt76_txwi_cache *t = mt76_get_rxwi(&dev->mt76);
 		dma_addr_t phy_addr;
@@ -635,7 +627,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct mtk_wed_device *wed, int size)
 		int token;
 		void *ptr;
 
-		page = __dev_alloc_pages(GFP_KERNEL, get_order(length));
+		page = __dev_alloc_page(GFP_KERNEL);
 		if (!page)
 			goto unmap;
 
@@ -644,7 +636,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct mtk_wed_device *wed, int size)
 					  wed->wlan.rx_size,
 					  DMA_TO_DEVICE);
 		if (unlikely(dma_mapping_error(dev->mt76.dev, phy_addr))) {
-			__free_pages(page, get_order(length));
+			free_page(page);
 			goto unmap;
 		}
 
@@ -653,7 +645,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct mtk_wed_device *wed, int size)
 		if (token < 0) {
 			dma_unmap_single(dev->mt76.dma_dev, phy_addr,
 					 wed->wlan.rx_size, DMA_TO_DEVICE);
-			__free_pages(page, get_order(length));
+			free_page(page);
 			goto unmap;
 		}
 
-- 
2.38.1


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] wifi: mt76: fix potential memory leakage
  2022-12-21 18:02           ` Lorenzo Bianconi
@ 2022-12-22  8:51             ` Sujuan Chen (陈素娟)
  2022-12-22  9:44               ` lorenzo
  2023-01-03  1:55             ` Sujuan Chen (陈素娟)
  1 sibling, 1 reply; 14+ messages in thread
From: Sujuan Chen (陈素娟) @ 2022-12-22  8:51 UTC (permalink / raw)
  To: nbd@nbd.name, lorenzo@kernel.org
  Cc: linux-wireless@vger.kernel.org,
	Shayne Chen (陳軒丞),
	Bo Jiao (焦波),
	Evelyn Tsai (蔡珊鈺), Ryder Lee,
	linux-mediatek@lists.infradead.org

On Wed, 2022-12-21 at 19:02 +0100, Lorenzo Bianconi wrote:
> On Dec 21, Felix Fietkau wrote:
> > Hi Sujuan,
> > 
> > > Yes, it is so expensive, but if no memcopy, it will casue memory
> > > fragmentation (we hit this issue in internal SQC).
> > > 
> > > as we know, wed needs to exchange rx pkt(belongs to wed rx buffer
> > > manager) with wifi driver(dma rx data queue) by exchanging wfdma
> > > dmad
> > > to ensure the free wed rx buffer.
> > > 
> > > it is possiable that a large number of buffer has been exchanged
> > > to wed
> > > and can not come back to wlan driver. So, the memory from the
> > > same 32K
> > > page cache is unable to be released, and it will be failed at
> > > page_frag_alloc in mt76_dma_rx_fill.
> > > 
> > > Any ideas but memcopy?
> > 
> > A simple solution would be to simply allocate single pages, or
> > half-page fragments.
> > 
> > - Felix
> > 
> 
> A simple approach would be allocating a single page for each rx
> buffer.
> 
> @Sujuan: can you please double check if it is ok from performance and
> memory
> 	 fragmentation point of view? If not I guess we can try to
> optimize it
> 	 and allocate multiple buffers in the same page tweeking page
> refcount.
> 
> (this patch must be applied on top of Felix's dma fix).
> 

Allocating single page for each rx buffer avoids memory fragmentation,
but it always uses 4K for one rx pkt which only needs 2K, right?

I guess performance would be worse without page cache.
We have tested on the mtk private driver, 7% drop in throughput when
setting the 4K page cache compared to the 32K page cache.
and 10% drop when use slab to allocate buffer.

A single page per rx buffer may cause a throughput drop of over 7% and
waste memory, what do you think?

Regards,
Sujuan

> Regards,
> Lorenzo
> 
> diff --git a/drivers/net/wireless/mediatek/mt76/dma.c
> b/drivers/net/wireless/mediatek/mt76/dma.c
> index 28a7fe064313..1d9e580977fc 100644
> --- a/drivers/net/wireless/mediatek/mt76/dma.c
> +++ b/drivers/net/wireless/mediatek/mt76/dma.c
> @@ -580,6 +580,20 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev,
> struct mt76_queue *q,
>  	return ret;
>  }
>  
> +static void *
> +mt76_dma_get_rx_buf(struct mt76_queue *q)
> +{
> +	if ((q->flags & MT_QFLAG_WED) &&
> +	    FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) == MT76_WED_Q_RX) {
> +		/* WED RX queue */
> +		struct page *page = dev_alloc_page();
> +
> +		return page ? page_address(page) : NULL;
> +	}
> +
> +	return page_frag_alloc(&q->rx_page, q->buf_size, GFP_ATOMIC);
> +}
> +
>  static int
>  mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
>  {
> @@ -596,7 +610,7 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct
> mt76_queue *q)
>  		struct mt76_queue_buf qbuf;
>  		void *buf = NULL;
>  
> -		buf = page_frag_alloc(&q->rx_page, q->buf_size,
> GFP_ATOMIC);
> +		buf = mt76_dma_get_rx_buf(q);
>  		if (!buf)
>  			break;
>  
> diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> index 1a2e4df8d1b5..2924e71e4fbe 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> @@ -594,13 +594,9 @@ static void
> mt7915_mmio_wed_offload_disable(struct mtk_wed_device *wed)
>  static void mt7915_mmio_wed_release_rx_buf(struct mtk_wed_device
> *wed)
>  {
>  	struct mt7915_dev *dev;
> -	u32 length;
>  	int i;
>  
>  	dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed);
> -	length = SKB_DATA_ALIGN(NET_SKB_PAD + wed->wlan.rx_size +
> -				sizeof(struct skb_shared_info));
> -
>  	for (i = 0; i < dev->mt76.rx_token_size; i++) {
>  		struct mt76_txwi_cache *t;
>  
> @@ -610,7 +606,7 @@ static void mt7915_mmio_wed_release_rx_buf(struct
> mtk_wed_device *wed)
>  
>  		dma_unmap_single(dev->mt76.dma_dev, t->dma_addr,
>  				 wed->wlan.rx_size, DMA_FROM_DEVICE);
> -		__free_pages(virt_to_page(t->ptr), get_order(length));
> +		free_page(virt_to_page(t->ptr));
>  		t->ptr = NULL;
>  
>  		mt76_put_rxwi(&dev->mt76, t);
> @@ -621,13 +617,9 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> mtk_wed_device *wed, int size)
>  {
>  	struct mtk_rxbm_desc *desc = wed->rx_buf_ring.desc;
>  	struct mt7915_dev *dev;
> -	u32 length;
>  	int i;
>  
>  	dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed);
> -	length = SKB_DATA_ALIGN(NET_SKB_PAD + wed->wlan.rx_size +
> -				sizeof(struct skb_shared_info));
> -
>  	for (i = 0; i < size; i++) {
>  		struct mt76_txwi_cache *t = mt76_get_rxwi(&dev->mt76);
>  		dma_addr_t phy_addr;
> @@ -635,7 +627,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> mtk_wed_device *wed, int size)
>  		int token;
>  		void *ptr;
>  
> -		page = __dev_alloc_pages(GFP_KERNEL,
> get_order(length));
> +		page = __dev_alloc_page(GFP_KERNEL);
>  		if (!page)
>  			goto unmap;
>  
> @@ -644,7 +636,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> mtk_wed_device *wed, int size)
>  					  wed->wlan.rx_size,
>  					  DMA_TO_DEVICE);
>  		if (unlikely(dma_mapping_error(dev->mt76.dev,
> phy_addr))) {
> -			__free_pages(page, get_order(length));
> +			free_page(page);
>  			goto unmap;
>  		}
>  
> @@ -653,7 +645,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> mtk_wed_device *wed, int size)
>  		if (token < 0) {
>  			dma_unmap_single(dev->mt76.dma_dev, phy_addr,
>  					 wed->wlan.rx_size,
> DMA_TO_DEVICE);
> -			__free_pages(page, get_order(length));
> +			free_page(page);
>  			goto unmap;
>  		}
>  

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] wifi: mt76: fix potential memory leakage
  2022-12-22  8:51             ` Sujuan Chen (陈素娟)
@ 2022-12-22  9:44               ` lorenzo
  2022-12-22 14:01                 ` Sujuan Chen (陈素娟)
  0 siblings, 1 reply; 14+ messages in thread
From: lorenzo @ 2022-12-22  9:44 UTC (permalink / raw)
  To: Sujuan Chen (陈素娟)
  Cc: nbd@nbd.name, linux-wireless@vger.kernel.org,
	Shayne Chen (陳軒丞),
	Bo Jiao (焦波),
	Evelyn Tsai (蔡珊鈺), Ryder Lee,
	linux-mediatek@lists.infradead.org

[-- Attachment #1: Type: text/plain, Size: 6702 bytes --]

> On Wed, 2022-12-21 at 19:02 +0100, Lorenzo Bianconi wrote:
> > On Dec 21, Felix Fietkau wrote:
> > > Hi Sujuan,
> > > 
> > > > Yes, it is so expensive, but if no memcopy, it will casue memory
> > > > fragmentation (we hit this issue in internal SQC).
> > > > 
> > > > as we know, wed needs to exchange rx pkt(belongs to wed rx buffer
> > > > manager) with wifi driver(dma rx data queue) by exchanging wfdma
> > > > dmad
> > > > to ensure the free wed rx buffer.
> > > > 
> > > > it is possiable that a large number of buffer has been exchanged
> > > > to wed
> > > > and can not come back to wlan driver. So, the memory from the
> > > > same 32K
> > > > page cache is unable to be released, and it will be failed at
> > > > page_frag_alloc in mt76_dma_rx_fill.
> > > > 
> > > > Any ideas but memcopy?
> > > 
> > > A simple solution would be to simply allocate single pages, or
> > > half-page fragments.
> > > 
> > > - Felix
> > > 
> > 
> > A simple approach would be allocating a single page for each rx
> > buffer.
> > 
> > @Sujuan: can you please double check if it is ok from performance and
> > memory
> > 	 fragmentation point of view? If not I guess we can try to
> > optimize it
> > 	 and allocate multiple buffers in the same page tweeking page
> > refcount.
> > 
> > (this patch must be applied on top of Felix's dma fix).
> > 
> 
> Allocating single page for each rx buffer avoids memory fragmentation,
> but it always uses 4K for one rx pkt which only needs 2K, right?

correct, we can optimize it allocating multiple buffers (in this case 2,
assuming 4K pages) in a single page and recycling the page.

> 
> I guess performance would be worse without page cache.

I think it is a trade-off

> We have tested on the mtk private driver, 7% drop in throughput when
> setting the 4K page cache compared to the 32K page cache.
> and 10% drop when use slab to allocate buffer.

IIUC you tested with PAGE_FRAG_CACHE_MAX_SIZE set to 4K (or with a private
page_frag_alloc() implementation) and you avoided memory allocation
failures due to fragmentation but you got ~ 7% drop in throughput, correct?
I think this is quite expected since we need to refill ~ 8 times more the
page cache.

Not considering memory fragmentation, have you measured the impact of the
memcpy of a full buffer?

> 
> A single page per rx buffer may cause a throughput drop of over 7% and
> waste memory, what do you think?

Implementing the page recycles as it is done in page_frag_alloc() we should get
the same results you got with PAGE_FRAG_CACHE_MAX_SIZE set to 4K.

Regards,
Lorenzo

> 
> Regards,
> Sujuan
> 
> > Regards,
> > Lorenzo
> > 
> > diff --git a/drivers/net/wireless/mediatek/mt76/dma.c
> > b/drivers/net/wireless/mediatek/mt76/dma.c
> > index 28a7fe064313..1d9e580977fc 100644
> > --- a/drivers/net/wireless/mediatek/mt76/dma.c
> > +++ b/drivers/net/wireless/mediatek/mt76/dma.c
> > @@ -580,6 +580,20 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev,
> > struct mt76_queue *q,
> >  	return ret;
> >  }
> >  
> > +static void *
> > +mt76_dma_get_rx_buf(struct mt76_queue *q)
> > +{
> > +	if ((q->flags & MT_QFLAG_WED) &&
> > +	    FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) == MT76_WED_Q_RX) {
> > +		/* WED RX queue */
> > +		struct page *page = dev_alloc_page();
> > +
> > +		return page ? page_address(page) : NULL;
> > +	}
> > +
> > +	return page_frag_alloc(&q->rx_page, q->buf_size, GFP_ATOMIC);
> > +}
> > +
> >  static int
> >  mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
> >  {
> > @@ -596,7 +610,7 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct
> > mt76_queue *q)
> >  		struct mt76_queue_buf qbuf;
> >  		void *buf = NULL;
> >  
> > -		buf = page_frag_alloc(&q->rx_page, q->buf_size,
> > GFP_ATOMIC);
> > +		buf = mt76_dma_get_rx_buf(q);
> >  		if (!buf)
> >  			break;
> >  
> > diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > index 1a2e4df8d1b5..2924e71e4fbe 100644
> > --- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > @@ -594,13 +594,9 @@ static void
> > mt7915_mmio_wed_offload_disable(struct mtk_wed_device *wed)
> >  static void mt7915_mmio_wed_release_rx_buf(struct mtk_wed_device
> > *wed)
> >  {
> >  	struct mt7915_dev *dev;
> > -	u32 length;
> >  	int i;
> >  
> >  	dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed);
> > -	length = SKB_DATA_ALIGN(NET_SKB_PAD + wed->wlan.rx_size +
> > -				sizeof(struct skb_shared_info));
> > -
> >  	for (i = 0; i < dev->mt76.rx_token_size; i++) {
> >  		struct mt76_txwi_cache *t;
> >  
> > @@ -610,7 +606,7 @@ static void mt7915_mmio_wed_release_rx_buf(struct
> > mtk_wed_device *wed)
> >  
> >  		dma_unmap_single(dev->mt76.dma_dev, t->dma_addr,
> >  				 wed->wlan.rx_size, DMA_FROM_DEVICE);
> > -		__free_pages(virt_to_page(t->ptr), get_order(length));
> > +		free_page(virt_to_page(t->ptr));
> >  		t->ptr = NULL;
> >  
> >  		mt76_put_rxwi(&dev->mt76, t);
> > @@ -621,13 +617,9 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> > mtk_wed_device *wed, int size)
> >  {
> >  	struct mtk_rxbm_desc *desc = wed->rx_buf_ring.desc;
> >  	struct mt7915_dev *dev;
> > -	u32 length;
> >  	int i;
> >  
> >  	dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed);
> > -	length = SKB_DATA_ALIGN(NET_SKB_PAD + wed->wlan.rx_size +
> > -				sizeof(struct skb_shared_info));
> > -
> >  	for (i = 0; i < size; i++) {
> >  		struct mt76_txwi_cache *t = mt76_get_rxwi(&dev->mt76);
> >  		dma_addr_t phy_addr;
> > @@ -635,7 +627,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> > mtk_wed_device *wed, int size)
> >  		int token;
> >  		void *ptr;
> >  
> > -		page = __dev_alloc_pages(GFP_KERNEL,
> > get_order(length));
> > +		page = __dev_alloc_page(GFP_KERNEL);
> >  		if (!page)
> >  			goto unmap;
> >  
> > @@ -644,7 +636,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> > mtk_wed_device *wed, int size)
> >  					  wed->wlan.rx_size,
> >  					  DMA_TO_DEVICE);
> >  		if (unlikely(dma_mapping_error(dev->mt76.dev,
> > phy_addr))) {
> > -			__free_pages(page, get_order(length));
> > +			free_page(page);
> >  			goto unmap;
> >  		}
> >  
> > @@ -653,7 +645,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> > mtk_wed_device *wed, int size)
> >  		if (token < 0) {
> >  			dma_unmap_single(dev->mt76.dma_dev, phy_addr,
> >  					 wed->wlan.rx_size,
> > DMA_TO_DEVICE);
> > -			__free_pages(page, get_order(length));
> > +			free_page(page);
> >  			goto unmap;
> >  		}
> >  

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] wifi: mt76: fix potential memory leakage
  2022-12-22  9:44               ` lorenzo
@ 2022-12-22 14:01                 ` Sujuan Chen (陈素娟)
  2022-12-22 14:26                   ` lorenzo
  0 siblings, 1 reply; 14+ messages in thread
From: Sujuan Chen (陈素娟) @ 2022-12-22 14:01 UTC (permalink / raw)
  To: lorenzo@kernel.org
  Cc: linux-wireless@vger.kernel.org,
	Shayne Chen (陳軒丞), nbd@nbd.name,
	Bo Jiao (焦波),
	Evelyn Tsai (蔡珊鈺), Ryder Lee,
	linux-mediatek@lists.infradead.org

On Thu, 2022-12-22 at 10:44 +0100, lorenzo@kernel.org wrote:
> > On Wed, 2022-12-21 at 19:02 +0100, Lorenzo Bianconi wrote:
> > > On Dec 21, Felix Fietkau wrote:
> > > > Hi Sujuan,
> > > > 
> > > > > Yes, it is so expensive, but if no memcopy, it will casue
> > > > > memory
> > > > > fragmentation (we hit this issue in internal SQC).
> > > > > 
> > > > > as we know, wed needs to exchange rx pkt(belongs to wed rx
> > > > > buffer
> > > > > manager) with wifi driver(dma rx data queue) by exchanging
> > > > > wfdma
> > > > > dmad
> > > > > to ensure the free wed rx buffer.
> > > > > 
> > > > > it is possiable that a large number of buffer has been
> > > > > exchanged
> > > > > to wed
> > > > > and can not come back to wlan driver. So, the memory from the
> > > > > same 32K
> > > > > page cache is unable to be released, and it will be failed at
> > > > > page_frag_alloc in mt76_dma_rx_fill.
> > > > > 
> > > > > Any ideas but memcopy?
> > > > 
> > > > A simple solution would be to simply allocate single pages, or
> > > > half-page fragments.
> > > > 
> > > > - Felix
> > > > 
> > > 
> > > A simple approach would be allocating a single page for each rx
> > > buffer.
> > > 
> > > @Sujuan: can you please double check if it is ok from performance
> > > and
> > > memory
> > > 	 fragmentation point of view? If not I guess we can try to
> > > optimize it
> > > 	 and allocate multiple buffers in the same page tweeking page
> > > refcount.
> > > 
> > > (this patch must be applied on top of Felix's dma fix).
> > > 
> > 
> > Allocating single page for each rx buffer avoids memory
> > fragmentation,
> > but it always uses 4K for one rx pkt which only needs 2K, right?
> 
> correct, we can optimize it allocating multiple buffers (in this case
> 2,
> assuming 4K pages) in a single page and recycling the page.
> 
> > 
> > I guess performance would be worse without page cache.
> 
> I think it is a trade-off
> 
> > We have tested on the mtk private driver, 7% drop in throughput
> > when
> > setting the 4K page cache compared to the 32K page cache.
> > and 10% drop when use slab to allocate buffer.
> 
> IIUC you tested with PAGE_FRAG_CACHE_MAX_SIZE set to 4K (or with a
> private
> page_frag_alloc() implementation) and you avoided memory allocation
> failures due to fragmentation but you got ~ 7% drop in throughput,
> correct?
> I think this is quite expected since we need to refill ~ 8 times more
> the
> page cache.
> 
> Not considering memory fragmentation, have you measured the impact of
> the
> memcpy of a full buffer?
> 

well, for pure sw path, it maybe ~300M drop (not sure) when using
memcpy(5G/HE80/4*4).
but we do memcpy only when wed on, atfer a few unbinding pkts, then all
flows are offloaded by hw. it is also a trade-off~

Regards,
Sujuan

> > 
> > A single page per rx buffer may cause a throughput drop of over 7%
> > and
> > waste memory, what do you think?
> 
> Implementing the page recycles as it is done in page_frag_alloc() we
> should get
> the same results you got with PAGE_FRAG_CACHE_MAX_SIZE set to 4K.
> 
> Regards,
> Lorenzo
> 
> > 
> > Regards,
> > Sujuan
> > 
> > > Regards,
> > > Lorenzo
> > > 
> > > diff --git a/drivers/net/wireless/mediatek/mt76/dma.c
> > > b/drivers/net/wireless/mediatek/mt76/dma.c
> > > index 28a7fe064313..1d9e580977fc 100644
> > > --- a/drivers/net/wireless/mediatek/mt76/dma.c
> > > +++ b/drivers/net/wireless/mediatek/mt76/dma.c
> > > @@ -580,6 +580,20 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev,
> > > struct mt76_queue *q,
> > >  	return ret;
> > >  }
> > >  
> > > +static void *
> > > +mt76_dma_get_rx_buf(struct mt76_queue *q)
> > > +{
> > > +	if ((q->flags & MT_QFLAG_WED) &&
> > > +	    FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) == MT76_WED_Q_RX) {
> > > +		/* WED RX queue */
> > > +		struct page *page = dev_alloc_page();
> > > +
> > > +		return page ? page_address(page) : NULL;
> > > +	}
> > > +
> > > +	return page_frag_alloc(&q->rx_page, q->buf_size, GFP_ATOMIC);
> > > +}
> > > +
> > >  static int
> > >  mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
> > >  {
> > > @@ -596,7 +610,7 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct
> > > mt76_queue *q)
> > >  		struct mt76_queue_buf qbuf;
> > >  		void *buf = NULL;
> > >  
> > > -		buf = page_frag_alloc(&q->rx_page, q->buf_size,
> > > GFP_ATOMIC);
> > > +		buf = mt76_dma_get_rx_buf(q);
> > >  		if (!buf)
> > >  			break;
> > >  
> > > diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > > b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > > index 1a2e4df8d1b5..2924e71e4fbe 100644
> > > --- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > > +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > > @@ -594,13 +594,9 @@ static void
> > > mt7915_mmio_wed_offload_disable(struct mtk_wed_device *wed)
> > >  static void mt7915_mmio_wed_release_rx_buf(struct mtk_wed_device
> > > *wed)
> > >  {
> > >  	struct mt7915_dev *dev;
> > > -	u32 length;
> > >  	int i;
> > >  
> > >  	dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed);
> > > -	length = SKB_DATA_ALIGN(NET_SKB_PAD + wed->wlan.rx_size +
> > > -				sizeof(struct skb_shared_info));
> > > -
> > >  	for (i = 0; i < dev->mt76.rx_token_size; i++) {
> > >  		struct mt76_txwi_cache *t;
> > >  
> > > @@ -610,7 +606,7 @@ static void
> > > mt7915_mmio_wed_release_rx_buf(struct
> > > mtk_wed_device *wed)
> > >  
> > >  		dma_unmap_single(dev->mt76.dma_dev, t->dma_addr,
> > >  				 wed->wlan.rx_size, DMA_FROM_DEVICE);
> > > -		__free_pages(virt_to_page(t->ptr), get_order(length));
> > > +		free_page(virt_to_page(t->ptr));
> > >  		t->ptr = NULL;
> > >  
> > >  		mt76_put_rxwi(&dev->mt76, t);
> > > @@ -621,13 +617,9 @@ static u32
> > > mt7915_mmio_wed_init_rx_buf(struct
> > > mtk_wed_device *wed, int size)
> > >  {
> > >  	struct mtk_rxbm_desc *desc = wed->rx_buf_ring.desc;
> > >  	struct mt7915_dev *dev;
> > > -	u32 length;
> > >  	int i;
> > >  
> > >  	dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed);
> > > -	length = SKB_DATA_ALIGN(NET_SKB_PAD + wed->wlan.rx_size +
> > > -				sizeof(struct skb_shared_info));
> > > -
> > >  	for (i = 0; i < size; i++) {
> > >  		struct mt76_txwi_cache *t = mt76_get_rxwi(&dev->mt76);
> > >  		dma_addr_t phy_addr;
> > > @@ -635,7 +627,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> > > mtk_wed_device *wed, int size)
> > >  		int token;
> > >  		void *ptr;
> > >  
> > > -		page = __dev_alloc_pages(GFP_KERNEL,
> > > get_order(length));
> > > +		page = __dev_alloc_page(GFP_KERNEL);
> > >  		if (!page)
> > >  			goto unmap;
> > >  
> > > @@ -644,7 +636,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> > > mtk_wed_device *wed, int size)
> > >  					  wed->wlan.rx_size,
> > >  					  DMA_TO_DEVICE);
> > >  		if (unlikely(dma_mapping_error(dev->mt76.dev,
> > > phy_addr))) {
> > > -			__free_pages(page, get_order(length));
> > > +			free_page(page);
> > >  			goto unmap;
> > >  		}
> > >  
> > > @@ -653,7 +645,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> > > mtk_wed_device *wed, int size)
> > >  		if (token < 0) {
> > >  			dma_unmap_single(dev->mt76.dma_dev, phy_addr,
> > >  					 wed->wlan.rx_size,
> > > DMA_TO_DEVICE);
> > > -			__free_pages(page, get_order(length));
> > > +			free_page(page);
> > >  			goto unmap;
> > >  		}
> > >  

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] wifi: mt76: fix potential memory leakage
  2022-12-22 14:01                 ` Sujuan Chen (陈素娟)
@ 2022-12-22 14:26                   ` lorenzo
  2022-12-22 23:57                     ` lorenzo
  0 siblings, 1 reply; 14+ messages in thread
From: lorenzo @ 2022-12-22 14:26 UTC (permalink / raw)
  To: Sujuan Chen (陈素娟)
  Cc: linux-wireless@vger.kernel.org,
	Shayne Chen (陳軒丞), nbd@nbd.name,
	Bo Jiao (焦波),
	Evelyn Tsai (蔡珊鈺), Ryder Lee,
	linux-mediatek@lists.infradead.org

[-- Attachment #1: Type: text/plain, Size: 8122 bytes --]

> On Thu, 2022-12-22 at 10:44 +0100, lorenzo@kernel.org wrote:
> > > On Wed, 2022-12-21 at 19:02 +0100, Lorenzo Bianconi wrote:
> > > > On Dec 21, Felix Fietkau wrote:
> > > > > Hi Sujuan,
> > > > > 
> > > > > > Yes, it is so expensive, but if no memcopy, it will casue
> > > > > > memory
> > > > > > fragmentation (we hit this issue in internal SQC).
> > > > > > 
> > > > > > as we know, wed needs to exchange rx pkt(belongs to wed rx
> > > > > > buffer
> > > > > > manager) with wifi driver(dma rx data queue) by exchanging
> > > > > > wfdma
> > > > > > dmad
> > > > > > to ensure the free wed rx buffer.
> > > > > > 
> > > > > > it is possiable that a large number of buffer has been
> > > > > > exchanged
> > > > > > to wed
> > > > > > and can not come back to wlan driver. So, the memory from the
> > > > > > same 32K
> > > > > > page cache is unable to be released, and it will be failed at
> > > > > > page_frag_alloc in mt76_dma_rx_fill.
> > > > > > 
> > > > > > Any ideas but memcopy?
> > > > > 
> > > > > A simple solution would be to simply allocate single pages, or
> > > > > half-page fragments.
> > > > > 
> > > > > - Felix
> > > > > 
> > > > 
> > > > A simple approach would be allocating a single page for each rx
> > > > buffer.
> > > > 
> > > > @Sujuan: can you please double check if it is ok from performance
> > > > and
> > > > memory
> > > > 	 fragmentation point of view? If not I guess we can try to
> > > > optimize it
> > > > 	 and allocate multiple buffers in the same page tweeking page
> > > > refcount.
> > > > 
> > > > (this patch must be applied on top of Felix's dma fix).
> > > > 
> > > 
> > > Allocating single page for each rx buffer avoids memory
> > > fragmentation,
> > > but it always uses 4K for one rx pkt which only needs 2K, right?
> > 
> > correct, we can optimize it allocating multiple buffers (in this case
> > 2,
> > assuming 4K pages) in a single page and recycling the page.
> > 
> > > 
> > > I guess performance would be worse without page cache.
> > 
> > I think it is a trade-off
> > 
> > > We have tested on the mtk private driver, 7% drop in throughput
> > > when
> > > setting the 4K page cache compared to the 32K page cache.
> > > and 10% drop when use slab to allocate buffer.
> > 
> > IIUC you tested with PAGE_FRAG_CACHE_MAX_SIZE set to 4K (or with a
> > private
> > page_frag_alloc() implementation) and you avoided memory allocation
> > failures due to fragmentation but you got ~ 7% drop in throughput,
> > correct?
> > I think this is quite expected since we need to refill ~ 8 times more
> > the
> > page cache.
> > 
> > Not considering memory fragmentation, have you measured the impact of
> > the
> > memcpy of a full buffer?
> > 
> 
> well, for pure sw path, it maybe ~300M drop (not sure) when using
> memcpy(5G/HE80/4*4).
> but we do memcpy only when wed on, atfer a few unbinding pkts, then all
> flows are offloaded by hw. it is also a trade-off~

I guess we have quite a big impact if we enable wed and we do not actually
offload any flow (or we do not enable hw flowtable in netfilter), dont' we?

Regards,
Lorenzo

> 
> Regards,
> Sujuan
> 
> > > 
> > > A single page per rx buffer may cause a throughput drop of over 7%
> > > and
> > > waste memory, what do you think?
> > 
> > Implementing the page recycles as it is done in page_frag_alloc() we
> > should get
> > the same results you got with PAGE_FRAG_CACHE_MAX_SIZE set to 4K.
> > 
> > Regards,
> > Lorenzo
> > 
> > > 
> > > Regards,
> > > Sujuan
> > > 
> > > > Regards,
> > > > Lorenzo
> > > > 
> > > > diff --git a/drivers/net/wireless/mediatek/mt76/dma.c
> > > > b/drivers/net/wireless/mediatek/mt76/dma.c
> > > > index 28a7fe064313..1d9e580977fc 100644
> > > > --- a/drivers/net/wireless/mediatek/mt76/dma.c
> > > > +++ b/drivers/net/wireless/mediatek/mt76/dma.c
> > > > @@ -580,6 +580,20 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev,
> > > > struct mt76_queue *q,
> > > >  	return ret;
> > > >  }
> > > >  
> > > > +static void *
> > > > +mt76_dma_get_rx_buf(struct mt76_queue *q)
> > > > +{
> > > > +	if ((q->flags & MT_QFLAG_WED) &&
> > > > +	    FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) == MT76_WED_Q_RX) {
> > > > +		/* WED RX queue */
> > > > +		struct page *page = dev_alloc_page();
> > > > +
> > > > +		return page ? page_address(page) : NULL;
> > > > +	}
> > > > +
> > > > +	return page_frag_alloc(&q->rx_page, q->buf_size, GFP_ATOMIC);
> > > > +}
> > > > +
> > > >  static int
> > > >  mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
> > > >  {
> > > > @@ -596,7 +610,7 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct
> > > > mt76_queue *q)
> > > >  		struct mt76_queue_buf qbuf;
> > > >  		void *buf = NULL;
> > > >  
> > > > -		buf = page_frag_alloc(&q->rx_page, q->buf_size,
> > > > GFP_ATOMIC);
> > > > +		buf = mt76_dma_get_rx_buf(q);
> > > >  		if (!buf)
> > > >  			break;
> > > >  
> > > > diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > > > b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > > > index 1a2e4df8d1b5..2924e71e4fbe 100644
> > > > --- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > > > +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > > > @@ -594,13 +594,9 @@ static void
> > > > mt7915_mmio_wed_offload_disable(struct mtk_wed_device *wed)
> > > >  static void mt7915_mmio_wed_release_rx_buf(struct mtk_wed_device
> > > > *wed)
> > > >  {
> > > >  	struct mt7915_dev *dev;
> > > > -	u32 length;
> > > >  	int i;
> > > >  
> > > >  	dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed);
> > > > -	length = SKB_DATA_ALIGN(NET_SKB_PAD + wed->wlan.rx_size +
> > > > -				sizeof(struct skb_shared_info));
> > > > -
> > > >  	for (i = 0; i < dev->mt76.rx_token_size; i++) {
> > > >  		struct mt76_txwi_cache *t;
> > > >  
> > > > @@ -610,7 +606,7 @@ static void
> > > > mt7915_mmio_wed_release_rx_buf(struct
> > > > mtk_wed_device *wed)
> > > >  
> > > >  		dma_unmap_single(dev->mt76.dma_dev, t->dma_addr,
> > > >  				 wed->wlan.rx_size, DMA_FROM_DEVICE);
> > > > -		__free_pages(virt_to_page(t->ptr), get_order(length));
> > > > +		free_page(virt_to_page(t->ptr));
> > > >  		t->ptr = NULL;
> > > >  
> > > >  		mt76_put_rxwi(&dev->mt76, t);
> > > > @@ -621,13 +617,9 @@ static u32
> > > > mt7915_mmio_wed_init_rx_buf(struct
> > > > mtk_wed_device *wed, int size)
> > > >  {
> > > >  	struct mtk_rxbm_desc *desc = wed->rx_buf_ring.desc;
> > > >  	struct mt7915_dev *dev;
> > > > -	u32 length;
> > > >  	int i;
> > > >  
> > > >  	dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed);
> > > > -	length = SKB_DATA_ALIGN(NET_SKB_PAD + wed->wlan.rx_size +
> > > > -				sizeof(struct skb_shared_info));
> > > > -
> > > >  	for (i = 0; i < size; i++) {
> > > >  		struct mt76_txwi_cache *t = mt76_get_rxwi(&dev->mt76);
> > > >  		dma_addr_t phy_addr;
> > > > @@ -635,7 +627,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> > > > mtk_wed_device *wed, int size)
> > > >  		int token;
> > > >  		void *ptr;
> > > >  
> > > > -		page = __dev_alloc_pages(GFP_KERNEL,
> > > > get_order(length));
> > > > +		page = __dev_alloc_page(GFP_KERNEL);
> > > >  		if (!page)
> > > >  			goto unmap;
> > > >  
> > > > @@ -644,7 +636,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> > > > mtk_wed_device *wed, int size)
> > > >  					  wed->wlan.rx_size,
> > > >  					  DMA_TO_DEVICE);
> > > >  		if (unlikely(dma_mapping_error(dev->mt76.dev,
> > > > phy_addr))) {
> > > > -			__free_pages(page, get_order(length));
> > > > +			free_page(page);
> > > >  			goto unmap;
> > > >  		}
> > > >  
> > > > @@ -653,7 +645,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> > > > mtk_wed_device *wed, int size)
> > > >  		if (token < 0) {
> > > >  			dma_unmap_single(dev->mt76.dma_dev, phy_addr,
> > > >  					 wed->wlan.rx_size,
> > > > DMA_TO_DEVICE);
> > > > -			__free_pages(page, get_order(length));
> > > > +			free_page(page);
> > > >  			goto unmap;
> > > >  		}
> > > >  

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] wifi: mt76: fix potential memory leakage
  2022-12-22 14:26                   ` lorenzo
@ 2022-12-22 23:57                     ` lorenzo
  2023-01-06  4:04                       ` Sujuan Chen (陈素娟)
  0 siblings, 1 reply; 14+ messages in thread
From: lorenzo @ 2022-12-22 23:57 UTC (permalink / raw)
  To: Sujuan Chen (陈素娟)
  Cc: linux-wireless@vger.kernel.org,
	Shayne Chen (陳軒丞), nbd@nbd.name,
	Bo Jiao (焦波),
	Evelyn Tsai (蔡珊鈺), Ryder Lee,
	linux-mediatek@lists.infradead.org

[-- Attachment #1: Type: text/plain, Size: 9461 bytes --]

> > On Thu, 2022-12-22 at 10:44 +0100, lorenzo@kernel.org wrote:
> > > > On Wed, 2022-12-21 at 19:02 +0100, Lorenzo Bianconi wrote:
> > > > > On Dec 21, Felix Fietkau wrote:
> > > > > > Hi Sujuan,
> > > > > > 
> > > > > > > Yes, it is so expensive, but if no memcopy, it will casue
> > > > > > > memory
> > > > > > > fragmentation (we hit this issue in internal SQC).
> > > > > > > 
> > > > > > > as we know, wed needs to exchange rx pkt(belongs to wed rx
> > > > > > > buffer
> > > > > > > manager) with wifi driver(dma rx data queue) by exchanging
> > > > > > > wfdma
> > > > > > > dmad
> > > > > > > to ensure the free wed rx buffer.
> > > > > > > 
> > > > > > > it is possiable that a large number of buffer has been
> > > > > > > exchanged
> > > > > > > to wed
> > > > > > > and can not come back to wlan driver. So, the memory from the
> > > > > > > same 32K
> > > > > > > page cache is unable to be released, and it will be failed at
> > > > > > > page_frag_alloc in mt76_dma_rx_fill.
> > > > > > > 
> > > > > > > Any ideas but memcopy?
> > > > > > 
> > > > > > A simple solution would be to simply allocate single pages, or
> > > > > > half-page fragments.
> > > > > > 
> > > > > > - Felix
> > > > > > 
> > > > > 
> > > > > A simple approach would be allocating a single page for each rx
> > > > > buffer.
> > > > > 
> > > > > @Sujuan: can you please double check if it is ok from performance
> > > > > and
> > > > > memory
> > > > > 	 fragmentation point of view? If not I guess we can try to
> > > > > optimize it
> > > > > 	 and allocate multiple buffers in the same page tweeking page
> > > > > refcount.
> > > > > 
> > > > > (this patch must be applied on top of Felix's dma fix).
> > > > > 
> > > > 
> > > > Allocating single page for each rx buffer avoids memory
> > > > fragmentation,
> > > > but it always uses 4K for one rx pkt which only needs 2K, right?
> > > 
> > > correct, we can optimize it allocating multiple buffers (in this case
> > > 2,
> > > assuming 4K pages) in a single page and recycling the page.
> > > 
> > > > 
> > > > I guess performance would be worse without page cache.
> > > 
> > > I think it is a trade-off
> > > 
> > > > We have tested on the mtk private driver, 7% drop in throughput
> > > > when
> > > > setting the 4K page cache compared to the 32K page cache.
> > > > and 10% drop when use slab to allocate buffer.
> > > 
> > > IIUC you tested with PAGE_FRAG_CACHE_MAX_SIZE set to 4K (or with a
> > > private
> > > page_frag_alloc() implementation) and you avoided memory allocation
> > > failures due to fragmentation but you got ~ 7% drop in throughput,
> > > correct?
> > > I think this is quite expected since we need to refill ~ 8 times more
> > > the
> > > page cache.
> > > 
> > > Not considering memory fragmentation, have you measured the impact of
> > > the
> > > memcpy of a full buffer?
> > > 
> > 
> > well, for pure sw path, it maybe ~300M drop (not sure) when using
> > memcpy(5G/HE80/4*4).
> > but we do memcpy only when wed on, atfer a few unbinding pkts, then all
> > flows are offloaded by hw. it is also a trade-off~
> 
> I guess we have quite a big impact if we enable wed and we do not actually
> offload any flow (or we do not enable hw flowtable in netfilter), dont' we?

Hi Sujuan,

here ([0],[1]) I switched mt76 to page_pool allocator [2]. page_pool allows us to
allocate pages of a requested order (order 0 for mt76) and split them between
multiple buffers (2 buffers per page for mt7915). Moreover pp allocator recycles
the pages in order to avoid the page allocator overhead and keep the pages DMA mapped
if requested (I implemented the support for mt7915).
The code is stable (with and without WED) but I have not run any performance
tests. Can you please help with them to see if this code helps solving the
discussed issue?

Regards,
Lorenzo

[0] https://github.com/LorenzoBianconi/wireless-drivers-next/commit/61ce8e9be04b40fa8a1c0f2778d266982be32d7a
[1] https://github.com/LorenzoBianconi/wireless-drivers-next/commit/f54eefce06d389b155574092d246b21631e42d47
[2] https://docs.kernel.org/networking/page_pool.html

> 
> Regards,
> Lorenzo
> 
> > 
> > Regards,
> > Sujuan
> > 
> > > > 
> > > > A single page per rx buffer may cause a throughput drop of over 7%
> > > > and
> > > > waste memory, what do you think?
> > > 
> > > Implementing the page recycles as it is done in page_frag_alloc() we
> > > should get
> > > the same results you got with PAGE_FRAG_CACHE_MAX_SIZE set to 4K.
> > > 
> > > Regards,
> > > Lorenzo
> > > 
> > > > 
> > > > Regards,
> > > > Sujuan
> > > > 
> > > > > Regards,
> > > > > Lorenzo
> > > > > 
> > > > > diff --git a/drivers/net/wireless/mediatek/mt76/dma.c
> > > > > b/drivers/net/wireless/mediatek/mt76/dma.c
> > > > > index 28a7fe064313..1d9e580977fc 100644
> > > > > --- a/drivers/net/wireless/mediatek/mt76/dma.c
> > > > > +++ b/drivers/net/wireless/mediatek/mt76/dma.c
> > > > > @@ -580,6 +580,20 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev,
> > > > > struct mt76_queue *q,
> > > > >  	return ret;
> > > > >  }
> > > > >  
> > > > > +static void *
> > > > > +mt76_dma_get_rx_buf(struct mt76_queue *q)
> > > > > +{
> > > > > +	if ((q->flags & MT_QFLAG_WED) &&
> > > > > +	    FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) == MT76_WED_Q_RX) {
> > > > > +		/* WED RX queue */
> > > > > +		struct page *page = dev_alloc_page();
> > > > > +
> > > > > +		return page ? page_address(page) : NULL;
> > > > > +	}
> > > > > +
> > > > > +	return page_frag_alloc(&q->rx_page, q->buf_size, GFP_ATOMIC);
> > > > > +}
> > > > > +
> > > > >  static int
> > > > >  mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
> > > > >  {
> > > > > @@ -596,7 +610,7 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct
> > > > > mt76_queue *q)
> > > > >  		struct mt76_queue_buf qbuf;
> > > > >  		void *buf = NULL;
> > > > >  
> > > > > -		buf = page_frag_alloc(&q->rx_page, q->buf_size,
> > > > > GFP_ATOMIC);
> > > > > +		buf = mt76_dma_get_rx_buf(q);
> > > > >  		if (!buf)
> > > > >  			break;
> > > > >  
> > > > > diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > > > > b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > > > > index 1a2e4df8d1b5..2924e71e4fbe 100644
> > > > > --- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > > > > +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > > > > @@ -594,13 +594,9 @@ static void
> > > > > mt7915_mmio_wed_offload_disable(struct mtk_wed_device *wed)
> > > > >  static void mt7915_mmio_wed_release_rx_buf(struct mtk_wed_device
> > > > > *wed)
> > > > >  {
> > > > >  	struct mt7915_dev *dev;
> > > > > -	u32 length;
> > > > >  	int i;
> > > > >  
> > > > >  	dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed);
> > > > > -	length = SKB_DATA_ALIGN(NET_SKB_PAD + wed->wlan.rx_size +
> > > > > -				sizeof(struct skb_shared_info));
> > > > > -
> > > > >  	for (i = 0; i < dev->mt76.rx_token_size; i++) {
> > > > >  		struct mt76_txwi_cache *t;
> > > > >  
> > > > > @@ -610,7 +606,7 @@ static void
> > > > > mt7915_mmio_wed_release_rx_buf(struct
> > > > > mtk_wed_device *wed)
> > > > >  
> > > > >  		dma_unmap_single(dev->mt76.dma_dev, t->dma_addr,
> > > > >  				 wed->wlan.rx_size, DMA_FROM_DEVICE);
> > > > > -		__free_pages(virt_to_page(t->ptr), get_order(length));
> > > > > +		free_page(virt_to_page(t->ptr));
> > > > >  		t->ptr = NULL;
> > > > >  
> > > > >  		mt76_put_rxwi(&dev->mt76, t);
> > > > > @@ -621,13 +617,9 @@ static u32
> > > > > mt7915_mmio_wed_init_rx_buf(struct
> > > > > mtk_wed_device *wed, int size)
> > > > >  {
> > > > >  	struct mtk_rxbm_desc *desc = wed->rx_buf_ring.desc;
> > > > >  	struct mt7915_dev *dev;
> > > > > -	u32 length;
> > > > >  	int i;
> > > > >  
> > > > >  	dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed);
> > > > > -	length = SKB_DATA_ALIGN(NET_SKB_PAD + wed->wlan.rx_size +
> > > > > -				sizeof(struct skb_shared_info));
> > > > > -
> > > > >  	for (i = 0; i < size; i++) {
> > > > >  		struct mt76_txwi_cache *t = mt76_get_rxwi(&dev->mt76);
> > > > >  		dma_addr_t phy_addr;
> > > > > @@ -635,7 +627,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> > > > > mtk_wed_device *wed, int size)
> > > > >  		int token;
> > > > >  		void *ptr;
> > > > >  
> > > > > -		page = __dev_alloc_pages(GFP_KERNEL,
> > > > > get_order(length));
> > > > > +		page = __dev_alloc_page(GFP_KERNEL);
> > > > >  		if (!page)
> > > > >  			goto unmap;
> > > > >  
> > > > > @@ -644,7 +636,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> > > > > mtk_wed_device *wed, int size)
> > > > >  					  wed->wlan.rx_size,
> > > > >  					  DMA_TO_DEVICE);
> > > > >  		if (unlikely(dma_mapping_error(dev->mt76.dev,
> > > > > phy_addr))) {
> > > > > -			__free_pages(page, get_order(length));
> > > > > +			free_page(page);
> > > > >  			goto unmap;
> > > > >  		}
> > > > >  
> > > > > @@ -653,7 +645,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> > > > > mtk_wed_device *wed, int size)
> > > > >  		if (token < 0) {
> > > > >  			dma_unmap_single(dev->mt76.dma_dev, phy_addr,
> > > > >  					 wed->wlan.rx_size,
> > > > > DMA_TO_DEVICE);
> > > > > -			__free_pages(page, get_order(length));
> > > > > +			free_page(page);
> > > > >  			goto unmap;
> > > > >  		}
> > > > >  



[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 228 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] wifi: mt76: fix potential memory leakage
  2022-12-21 18:02           ` Lorenzo Bianconi
  2022-12-22  8:51             ` Sujuan Chen (陈素娟)
@ 2023-01-03  1:55             ` Sujuan Chen (陈素娟)
  1 sibling, 0 replies; 14+ messages in thread
From: Sujuan Chen (陈素娟) @ 2023-01-03  1:55 UTC (permalink / raw)
  To: nbd@nbd.name, lorenzo@kernel.org
  Cc: linux-wireless@vger.kernel.org,
	Shayne Chen (陳軒丞),
	Bo Jiao (焦波),
	Evelyn Tsai (蔡珊鈺), Ryder Lee,
	linux-mediatek@lists.infradead.org

On Wed, 2022-12-21 at 19:02 +0100, Lorenzo Bianconi wrote:
> On Dec 21, Felix Fietkau wrote:
> > Hi Sujuan,
> > 
> > > Yes, it is so expensive, but if no memcopy, it will casue memory
> > > fragmentation (we hit this issue in internal SQC).
> > > 
> > > as we know, wed needs to exchange rx pkt(belongs to wed rx buffer
> > > manager) with wifi driver(dma rx data queue) by exchanging wfdma
> > > dmad
> > > to ensure the free wed rx buffer.
> > > 
> > > it is possiable that a large number of buffer has been exchanged
> > > to wed
> > > and can not come back to wlan driver. So, the memory from the
> > > same 32K
> > > page cache is unable to be released, and it will be failed at
> > > page_frag_alloc in mt76_dma_rx_fill.
> > > 
> > > Any ideas but memcopy?
> > 
> > A simple solution would be to simply allocate single pages, or
> > half-page fragments.
> > 
> > - Felix
> > 
> 
> A simple approach would be allocating a single page for each rx
> buffer.
> 
> @Sujuan: can you please double check if it is ok from performance and
> memory
> 	 fragmentation point of view? If not I guess we can try to
> optimize it
> 	 and allocate multiple buffers in the same page tweeking page
> refcount.
> 
> (this patch must be applied on top of Felix's dma fix).
> 

Allocating single page for each rx buffer avoids memory fragmentation,
but it always uses 4K for one rx pkt which only needs 2K, right?

I guess performance would be worse without page cache.
We have tested on the mtk private driver, 7% drop in throughput when
setting the 4K page cache compared to the 32K page cache.
and 10% drop when use slab to allocate buffer.

A single page per rx buffer may cause a throughput drop of over 7% and
waste memory, what do you think?

Regards,
Sujuan

> Regards,
> Lorenzo
> 
> diff --git a/drivers/net/wireless/mediatek/mt76/dma.c
> b/drivers/net/wireless/mediatek/mt76/dma.c
> index 28a7fe064313..1d9e580977fc 100644
> --- a/drivers/net/wireless/mediatek/mt76/dma.c
> +++ b/drivers/net/wireless/mediatek/mt76/dma.c
> @@ -580,6 +580,20 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev,
> struct mt76_queue *q,
>  	return ret;
>  }
>  
> +static void *
> +mt76_dma_get_rx_buf(struct mt76_queue *q)
> +{
> +	if ((q->flags & MT_QFLAG_WED) &&
> +	    FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) == MT76_WED_Q_RX) {
> +		/* WED RX queue */
> +		struct page *page = dev_alloc_page();
> +
> +		return page ? page_address(page) : NULL;
> +	}
> +
> +	return page_frag_alloc(&q->rx_page, q->buf_size, GFP_ATOMIC);
> +}
> +
>  static int
>  mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue *q)
>  {
> @@ -596,7 +610,7 @@ mt76_dma_rx_fill(struct mt76_dev *dev, struct
> mt76_queue *q)
>  		struct mt76_queue_buf qbuf;
>  		void *buf = NULL;
>  
> -		buf = page_frag_alloc(&q->rx_page, q->buf_size,
> GFP_ATOMIC);
> +		buf = mt76_dma_get_rx_buf(q);
>  		if (!buf)
>  			break;
>  
> diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> index 1a2e4df8d1b5..2924e71e4fbe 100644
> --- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> @@ -594,13 +594,9 @@ static void
> mt7915_mmio_wed_offload_disable(struct mtk_wed_device *wed)
>  static void mt7915_mmio_wed_release_rx_buf(struct mtk_wed_device
> *wed)
>  {
>  	struct mt7915_dev *dev;
> -	u32 length;
>  	int i;
>  
>  	dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed);
> -	length = SKB_DATA_ALIGN(NET_SKB_PAD + wed->wlan.rx_size +
> -				sizeof(struct skb_shared_info));
> -
>  	for (i = 0; i < dev->mt76.rx_token_size; i++) {
>  		struct mt76_txwi_cache *t;
>  
> @@ -610,7 +606,7 @@ static void mt7915_mmio_wed_release_rx_buf(struct
> mtk_wed_device *wed)
>  
>  		dma_unmap_single(dev->mt76.dma_dev, t->dma_addr,
>  				 wed->wlan.rx_size, DMA_FROM_DEVICE);
> -		__free_pages(virt_to_page(t->ptr), get_order(length));
> +		free_page(virt_to_page(t->ptr));
>  		t->ptr = NULL;
>  
>  		mt76_put_rxwi(&dev->mt76, t);
> @@ -621,13 +617,9 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> mtk_wed_device *wed, int size)
>  {
>  	struct mtk_rxbm_desc *desc = wed->rx_buf_ring.desc;
>  	struct mt7915_dev *dev;
> -	u32 length;
>  	int i;
>  
>  	dev = container_of(wed, struct mt7915_dev, mt76.mmio.wed);
> -	length = SKB_DATA_ALIGN(NET_SKB_PAD + wed->wlan.rx_size +
> -				sizeof(struct skb_shared_info));
> -
>  	for (i = 0; i < size; i++) {
>  		struct mt76_txwi_cache *t = mt76_get_rxwi(&dev->mt76);
>  		dma_addr_t phy_addr;
> @@ -635,7 +627,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> mtk_wed_device *wed, int size)
>  		int token;
>  		void *ptr;
>  
> -		page = __dev_alloc_pages(GFP_KERNEL,
> get_order(length));
> +		page = __dev_alloc_page(GFP_KERNEL);
>  		if (!page)
>  			goto unmap;
>  
> @@ -644,7 +636,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> mtk_wed_device *wed, int size)
>  					  wed->wlan.rx_size,
>  					  DMA_TO_DEVICE);
>  		if (unlikely(dma_mapping_error(dev->mt76.dev,
> phy_addr))) {
> -			__free_pages(page, get_order(length));
> +			free_page(page);
>  			goto unmap;
>  		}
>  
> @@ -653,7 +645,7 @@ static u32 mt7915_mmio_wed_init_rx_buf(struct
> mtk_wed_device *wed, int size)
>  		if (token < 0) {
>  			dma_unmap_single(dev->mt76.dma_dev, phy_addr,
>  					 wed->wlan.rx_size,
> DMA_TO_DEVICE);
> -			__free_pages(page, get_order(length));
> +			free_page(page);
>  			goto unmap;
>  		}
>  

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] wifi: mt76: fix potential memory leakage
  2022-12-22 23:57                     ` lorenzo
@ 2023-01-06  4:04                       ` Sujuan Chen (陈素娟)
  0 siblings, 0 replies; 14+ messages in thread
From: Sujuan Chen (陈素娟) @ 2023-01-06  4:04 UTC (permalink / raw)
  To: lorenzo@kernel.org
  Cc: linux-wireless@vger.kernel.org,
	Shayne Chen (陳軒丞), nbd@nbd.name,
	Bo Jiao (焦波),
	Evelyn Tsai (蔡珊鈺), Ryder Lee,
	linux-mediatek@lists.infradead.org

On Fri, 2022-12-23 at 00:57 +0100, lorenzo@kernel.org wrote:
> > > On Thu, 2022-12-22 at 10:44 +0100, lorenzo@kernel.org wrote:
> > > > > On Wed, 2022-12-21 at 19:02 +0100, Lorenzo Bianconi wrote:
> > > > > > On Dec 21, Felix Fietkau wrote:
> > > > > > > Hi Sujuan,
> > > > > > > 
> > > > > > > > Yes, it is so expensive, but if no memcopy, it will
> > > > > > > > casue
> > > > > > > > memory
> > > > > > > > fragmentation (we hit this issue in internal SQC).
> > > > > > > > 
> > > > > > > > as we know, wed needs to exchange rx pkt(belongs to wed
> > > > > > > > rx
> > > > > > > > buffer
> > > > > > > > manager) with wifi driver(dma rx data queue) by
> > > > > > > > exchanging
> > > > > > > > wfdma
> > > > > > > > dmad
> > > > > > > > to ensure the free wed rx buffer.
> > > > > > > > 
> > > > > > > > it is possiable that a large number of buffer has been
> > > > > > > > exchanged
> > > > > > > > to wed
> > > > > > > > and can not come back to wlan driver. So, the memory
> > > > > > > > from the
> > > > > > > > same 32K
> > > > > > > > page cache is unable to be released, and it will be
> > > > > > > > failed at
> > > > > > > > page_frag_alloc in mt76_dma_rx_fill.
> > > > > > > > 
> > > > > > > > Any ideas but memcopy?
> > > > > > > 
> > > > > > > A simple solution would be to simply allocate single
> > > > > > > pages, or
> > > > > > > half-page fragments.
> > > > > > > 
> > > > > > > - Felix
> > > > > > > 
> > > > > > 
> > > > > > A simple approach would be allocating a single page for
> > > > > > each rx
> > > > > > buffer.
> > > > > > 
> > > > > > @Sujuan: can you please double check if it is ok from
> > > > > > performance
> > > > > > and
> > > > > > memory
> > > > > > 	 fragmentation point of view? If not I guess we can try
> > > > > > to
> > > > > > optimize it
> > > > > > 	 and allocate multiple buffers in the same page
> > > > > > tweeking page
> > > > > > refcount.
> > > > > > 
> > > > > > (this patch must be applied on top of Felix's dma fix).
> > > > > > 
> > > > > 
> > > > > Allocating single page for each rx buffer avoids memory
> > > > > fragmentation,
> > > > > but it always uses 4K for one rx pkt which only needs 2K,
> > > > > right?
> > > > 
> > > > correct, we can optimize it allocating multiple buffers (in
> > > > this case
> > > > 2,
> > > > assuming 4K pages) in a single page and recycling the page.
> > > > 
> > > > > 
> > > > > I guess performance would be worse without page cache.
> > > > 
> > > > I think it is a trade-off
> > > > 
> > > > > We have tested on the mtk private driver, 7% drop in
> > > > > throughput
> > > > > when
> > > > > setting the 4K page cache compared to the 32K page cache.
> > > > > and 10% drop when use slab to allocate buffer.
> > > > 
> > > > IIUC you tested with PAGE_FRAG_CACHE_MAX_SIZE set to 4K (or
> > > > with a
> > > > private
> > > > page_frag_alloc() implementation) and you avoided memory
> > > > allocation
> > > > failures due to fragmentation but you got ~ 7% drop in
> > > > throughput,
> > > > correct?
> > > > I think this is quite expected since we need to refill ~ 8
> > > > times more
> > > > the
> > > > page cache.
> > > > 
> > > > Not considering memory fragmentation, have you measured the
> > > > impact of
> > > > the
> > > > memcpy of a full buffer?
> > > > 
> > > 
> > > well, for pure sw path, it maybe ~300M drop (not sure) when using
> > > memcpy(5G/HE80/4*4).
> > > but we do memcpy only when wed on, atfer a few unbinding pkts,
> > > then all
> > > flows are offloaded by hw. it is also a trade-off~
> > 
> > I guess we have quite a big impact if we enable wed and we do not
> > actually
> > offload any flow (or we do not enable hw flowtable in netfilter),
> > dont' we?
> 
> Hi Sujuan,
> 
> here ([0],[1]) I switched mt76 to page_pool allocator [2]. page_pool
> allows us to
> allocate pages of a requested order (order 0 for mt76) and split them
> between
> multiple buffers (2 buffers per page for mt7915). Moreover pp
> allocator recycles
> the pages in order to avoid the page allocator overhead and keep the
> pages DMA mapped
> if requested (I implemented the support for mt7915).
> The code is stable (with and without WED) but I have not run any
> performance
> tests. Can you please help with them to see if this code helps
> solving the
> discussed issue?
> 

Hi Lore,


I tried to apply your patches into my codebase(kernel-5.x). However
since page pool api coming from kernel-6.x does not exist in kernel-
5.x, I can not build a proper image for testing (even thought we
applied 30+ page pool patches).

Sorry for not helping you.

Reagrds,
Sujuan

> Regards,
> Lorenzo
> 
> [0] 
> https://github.com/LorenzoBianconi/wireless-drivers-next/commit/61ce8e9be04b40fa8a1c0f2778d266982be32d7a
> [1] 
> https://github.com/LorenzoBianconi/wireless-drivers-next/commit/f54eefce06d389b155574092d246b21631e42d47
> [2] https://docs.kernel.org/networking/page_pool.html
> 
> > 
> > Regards,
> > Lorenzo
> > 
> > > 
> > > Regards,
> > > Sujuan
> > > 
> > > > > 
> > > > > A single page per rx buffer may cause a throughput drop of
> > > > > over 7%
> > > > > and
> > > > > waste memory, what do you think?
> > > > 
> > > > Implementing the page recycles as it is done in
> > > > page_frag_alloc() we
> > > > should get
> > > > the same results you got with PAGE_FRAG_CACHE_MAX_SIZE set to
> > > > 4K.
> > > > 
> > > > Regards,
> > > > Lorenzo
> > > > 
> > > > > 
> > > > > Regards,
> > > > > Sujuan
> > > > > 
> > > > > > Regards,
> > > > > > Lorenzo
> > > > > > 
> > > > > > diff --git a/drivers/net/wireless/mediatek/mt76/dma.c
> > > > > > b/drivers/net/wireless/mediatek/mt76/dma.c
> > > > > > index 28a7fe064313..1d9e580977fc 100644
> > > > > > --- a/drivers/net/wireless/mediatek/mt76/dma.c
> > > > > > +++ b/drivers/net/wireless/mediatek/mt76/dma.c
> > > > > > @@ -580,6 +580,20 @@ mt76_dma_tx_queue_skb(struct mt76_dev
> > > > > > *dev,
> > > > > > struct mt76_queue *q,
> > > > > >  	return ret;
> > > > > >  }
> > > > > >  
> > > > > > +static void *
> > > > > > +mt76_dma_get_rx_buf(struct mt76_queue *q)
> > > > > > +{
> > > > > > +	if ((q->flags & MT_QFLAG_WED) &&
> > > > > > +	    FIELD_GET(MT_QFLAG_WED_TYPE, q->flags) ==
> > > > > > MT76_WED_Q_RX) {
> > > > > > +		/* WED RX queue */
> > > > > > +		struct page *page = dev_alloc_page();
> > > > > > +
> > > > > > +		return page ? page_address(page) : NULL;
> > > > > > +	}
> > > > > > +
> > > > > > +	return page_frag_alloc(&q->rx_page, q->buf_size,
> > > > > > GFP_ATOMIC);
> > > > > > +}
> > > > > > +
> > > > > >  static int
> > > > > >  mt76_dma_rx_fill(struct mt76_dev *dev, struct mt76_queue
> > > > > > *q)
> > > > > >  {
> > > > > > @@ -596,7 +610,7 @@ mt76_dma_rx_fill(struct mt76_dev *dev,
> > > > > > struct
> > > > > > mt76_queue *q)
> > > > > >  		struct mt76_queue_buf qbuf;
> > > > > >  		void *buf = NULL;
> > > > > >  
> > > > > > -		buf = page_frag_alloc(&q->rx_page, q->buf_size,
> > > > > > GFP_ATOMIC);
> > > > > > +		buf = mt76_dma_get_rx_buf(q);
> > > > > >  		if (!buf)
> > > > > >  			break;
> > > > > >  
> > > > > > diff --git
> > > > > > a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > > > > > b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > > > > > index 1a2e4df8d1b5..2924e71e4fbe 100644
> > > > > > --- a/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > > > > > +++ b/drivers/net/wireless/mediatek/mt76/mt7915/mmio.c
> > > > > > @@ -594,13 +594,9 @@ static void
> > > > > > mt7915_mmio_wed_offload_disable(struct mtk_wed_device *wed)
> > > > > >  static void mt7915_mmio_wed_release_rx_buf(struct
> > > > > > mtk_wed_device
> > > > > > *wed)
> > > > > >  {
> > > > > >  	struct mt7915_dev *dev;
> > > > > > -	u32 length;
> > > > > >  	int i;
> > > > > >  
> > > > > >  	dev = container_of(wed, struct mt7915_dev,
> > > > > > mt76.mmio.wed);
> > > > > > -	length = SKB_DATA_ALIGN(NET_SKB_PAD + wed->wlan.rx_size 
> > > > > > +
> > > > > > -				sizeof(struct
> > > > > > skb_shared_info));
> > > > > > -
> > > > > >  	for (i = 0; i < dev->mt76.rx_token_size; i++) {
> > > > > >  		struct mt76_txwi_cache *t;
> > > > > >  
> > > > > > @@ -610,7 +606,7 @@ static void
> > > > > > mt7915_mmio_wed_release_rx_buf(struct
> > > > > > mtk_wed_device *wed)
> > > > > >  
> > > > > >  		dma_unmap_single(dev->mt76.dma_dev, t-
> > > > > > >dma_addr,
> > > > > >  				 wed->wlan.rx_size,
> > > > > > DMA_FROM_DEVICE);
> > > > > > -		__free_pages(virt_to_page(t->ptr),
> > > > > > get_order(length));
> > > > > > +		free_page(virt_to_page(t->ptr));
> > > > > >  		t->ptr = NULL;
> > > > > >  
> > > > > >  		mt76_put_rxwi(&dev->mt76, t);
> > > > > > @@ -621,13 +617,9 @@ static u32
> > > > > > mt7915_mmio_wed_init_rx_buf(struct
> > > > > > mtk_wed_device *wed, int size)
> > > > > >  {
> > > > > >  	struct mtk_rxbm_desc *desc = wed->rx_buf_ring.desc;
> > > > > >  	struct mt7915_dev *dev;
> > > > > > -	u32 length;
> > > > > >  	int i;
> > > > > >  
> > > > > >  	dev = container_of(wed, struct mt7915_dev,
> > > > > > mt76.mmio.wed);
> > > > > > -	length = SKB_DATA_ALIGN(NET_SKB_PAD + wed->wlan.rx_size 
> > > > > > +
> > > > > > -				sizeof(struct
> > > > > > skb_shared_info));
> > > > > > -
> > > > > >  	for (i = 0; i < size; i++) {
> > > > > >  		struct mt76_txwi_cache *t = mt76_get_rxwi(&dev-
> > > > > > >mt76);
> > > > > >  		dma_addr_t phy_addr;
> > > > > > @@ -635,7 +627,7 @@ static u32
> > > > > > mt7915_mmio_wed_init_rx_buf(struct
> > > > > > mtk_wed_device *wed, int size)
> > > > > >  		int token;
> > > > > >  		void *ptr;
> > > > > >  
> > > > > > -		page = __dev_alloc_pages(GFP_KERNEL,
> > > > > > get_order(length));
> > > > > > +		page = __dev_alloc_page(GFP_KERNEL);
> > > > > >  		if (!page)
> > > > > >  			goto unmap;
> > > > > >  
> > > > > > @@ -644,7 +636,7 @@ static u32
> > > > > > mt7915_mmio_wed_init_rx_buf(struct
> > > > > > mtk_wed_device *wed, int size)
> > > > > >  					  wed->wlan.rx_size,
> > > > > >  					  DMA_TO_DEVICE);
> > > > > >  		if (unlikely(dma_mapping_error(dev->mt76.dev,
> > > > > > phy_addr))) {
> > > > > > -			__free_pages(page, get_order(length));
> > > > > > +			free_page(page);
> > > > > >  			goto unmap;
> > > > > >  		}
> > > > > >  
> > > > > > @@ -653,7 +645,7 @@ static u32
> > > > > > mt7915_mmio_wed_init_rx_buf(struct
> > > > > > mtk_wed_device *wed, int size)
> > > > > >  		if (token < 0) {
> > > > > >  			dma_unmap_single(dev->mt76.dma_dev,
> > > > > > phy_addr,
> > > > > >  					 wed->wlan.rx_size,
> > > > > > DMA_TO_DEVICE);
> > > > > > -			__free_pages(page, get_order(length));
> > > > > > +			free_page(page);
> > > > > >  			goto unmap;
> > > > > >  		}
> > > > > >  
> 
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2023-01-06  4:05 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-12-19  4:48 [PATCH v2] wifi: mt76: fix potential memory leakage Bo Jiao
2022-12-19  9:55 ` Lorenzo Bianconi
2022-12-20  4:42   ` Sujuan Chen (陈素娟)
2022-12-21 10:26     ` lorenzo
2022-12-21 14:48       ` Sujuan Chen (陈素娟)
2022-12-21 15:12         ` Felix Fietkau
2022-12-21 18:02           ` Lorenzo Bianconi
2022-12-22  8:51             ` Sujuan Chen (陈素娟)
2022-12-22  9:44               ` lorenzo
2022-12-22 14:01                 ` Sujuan Chen (陈素娟)
2022-12-22 14:26                   ` lorenzo
2022-12-22 23:57                     ` lorenzo
2023-01-06  4:04                       ` Sujuan Chen (陈素娟)
2023-01-03  1:55             ` Sujuan Chen (陈素娟)

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).