Re: [EXT] Re: [PATCH 1/1] net: fec: add initial XDP support

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Jesper Dangaard Brouer <jbrouer@redhat.com>
To: Shenwei Wang <shenwei.wang@nxp.com>,
	Jesper Dangaard Brouer <jbrouer@redhat.com>,
	Joakim Zhang <qiangqing.zhang@nxp.com>,
	"David S. Miller" <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>
Cc: brouer@redhat.com, Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Jesper Dangaard Brouer <hawk@kernel.org>,
	John Fastabend <john.fastabend@gmail.com>,
	"netdev@vger.kernel.org" <netdev@vger.kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"imx@lists.linux.dev" <imx@lists.linux.dev>
Subject: Re: [EXT] Re: [PATCH 1/1] net: fec: add initial XDP support
Date: Thu, 29 Sep 2022 17:44:28 +0200	[thread overview]
Message-ID: <f1a4bd53-aa55-a040-9171-7f8a57649151@redhat.com> (raw)
In-Reply-To: <PAXPR04MB91853E39BDA9A46CB5DED06D89579@PAXPR04MB9185.eurprd04.prod.outlook.com>



On 29/09/2022 15.11, Shenwei Wang wrote:
> 
>> From: Jesper Dangaard Brouer <jbrouer@redhat.com>
 >>
>>> diff --git a/drivers/net/ethernet/freescale/fec.h
>>> b/drivers/net/ethernet/freescale/fec.h
>>> index b0100fe3c9e4..f7531503aa95 100644
>>> --- a/drivers/net/ethernet/freescale/fec.h
>>> +++ b/drivers/net/ethernet/freescale/fec.h
>>> @@ -346,8 +346,10 @@ struct bufdesc_ex {
>>>     * the skbuffer directly.
>>>     */
>>>
>>> +#define FEC_ENET_XDP_HEADROOM        (512) /* XDP_PACKET_HEADROOM
>> + NET_IP_ALIGN) */
>>
>> Why the large headroom?
>>
> 
> The accurate value here should be "XDP_PACKET_HEADROOM (256) +
> NET_IP_ALIGN" which then aligns with 64 bytes. So 256 + 64 should be
> enough here.
> 

Most other XDP drivers have 256 bytes headroom.
I don't understand why you just don't keep this at 256, like other drivers ?

>>> +
>>>    #define FEC_ENET_RX_PAGES   256
>>> -#define FEC_ENET_RX_FRSIZE   2048
>>> +#define FEC_ENET_RX_FRSIZE   (PAGE_SIZE - FEC_ENET_XDP_HEADROOM)
>>
>> This FEC_ENET_RX_FRSIZE is likely wrong, because you also need to reserve 320
>> bytes at the end for struct skb_shared_info.
>> (320 calculated as SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
>>
>>>    #define FEC_ENET_RX_FRPPG   (PAGE_SIZE / FEC_ENET_RX_FRSIZE)
>>>    #define RX_RING_SIZE                (FEC_ENET_RX_FRPPG *
>> FEC_ENET_RX_PAGES)
>>>    #define FEC_ENET_TX_FRSIZE  2048
>>> @@ -517,6 +519,22 @@ struct bufdesc_prop {
>> [...]
>>
>>> diff --git a/drivers/net/ethernet/freescale/fec_main.c
>>> b/drivers/net/ethernet/freescale/fec_main.c
>>> index 59921218a8a4..2e30182ed770 100644
>>> --- a/drivers/net/ethernet/freescale/fec_main.c
>>> +++ b/drivers/net/ethernet/freescale/fec_main.c
>>> @@ -66,6 +66,8 @@
>>>    #include <linux/mfd/syscon.h>
>>>    #include <linux/regmap.h>
>>>    #include <soc/imx/cpuidle.h>
>>> +#include <linux/filter.h>
>>> +#include <linux/bpf.h>
>>>
>>>    #include <asm/cacheflush.h>
>>>
>>> @@ -87,6 +89,11 @@ static const u16 fec_enet_vlan_pri_to_queue[8] = {0, 0,
>> 1, 1, 1, 2, 2, 2};
>>>    #define FEC_ENET_OPD_V      0xFFF0
>>>    #define FEC_MDIO_PM_TIMEOUT  100 /* ms */
>>>
>>> +#define FEC_ENET_XDP_PASS          0
>>> +#define FEC_ENET_XDP_CONSUMED      BIT(0)
>>> +#define FEC_ENET_XDP_TX            BIT(1)
>>> +#define FEC_ENET_XDP_REDIR         BIT(2)
>>> +
>>>    struct fec_devinfo {
>>>        u32 quirks;
>>>    };
>>> @@ -422,6 +429,49 @@ fec_enet_clear_csum(struct sk_buff *skb, struct
>> net_device *ndev)
>>>        return 0;
>>>    }
>>>
>>> +static int
>>> +fec_enet_create_page_pool(struct fec_enet_private *fep,
>>> +                       struct fec_enet_priv_rx_q *rxq, int size) {
>>> +     struct bpf_prog *xdp_prog = READ_ONCE(fep->xdp_prog);
>>> +     struct page_pool_params pp_params = {
>>> +             .order = 0,
>>> +             .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
>>> +             .pool_size = size,
>>> +             .nid = dev_to_node(&fep->pdev->dev),
>>> +             .dev = &fep->pdev->dev,
>>> +             .dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE,
>>> +             .offset = FEC_ENET_XDP_HEADROOM,
>>> +             .max_len = FEC_ENET_RX_FRSIZE,
>>
>> XDP BPF-prog cannot access last 320 bytes, so FEC_ENET_RX_FRSIZE is wrong
>> here.
>>
> 
> So the FEC_ENET_RX_FRSIZE should subtract the sizeof(struct
> skb_shared_info) in the definition, right?
> 

Yes correct, but use:
   SKB_DATA_ALIGN(sizeof(struct skb_shared_info))


>>> +     };
>>> +     int err;
>>> +
>>> +     rxq->page_pool = page_pool_create(&pp_params);
>>> +     if (IS_ERR(rxq->page_pool)) {
>>> +             err = PTR_ERR(rxq->page_pool);
>>> +             rxq->page_pool = NULL;
>>> +             return err;
>>> +     }
>>> +
>>> +     err = xdp_rxq_info_reg(&rxq->xdp_rxq, fep->netdev, rxq->id, 0);
>>> +     if (err < 0)
>>> +             goto err_free_pp;
>>> +
>>> +     err = xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, MEM_TYPE_PAGE_POOL,
>>> +                                      rxq->page_pool);
>>> +     if (err)
>>> +             goto err_unregister_rxq;
>>> +
>>> +     return 0;
>>> +
>>> +err_unregister_rxq:
>>> +     xdp_rxq_info_unreg(&rxq->xdp_rxq);
>>> +err_free_pp:
>>> +     page_pool_destroy(rxq->page_pool);
>>> +     rxq->page_pool = NULL;
>>> +     return err;
>>> +}
>>> +
>>>    static struct bufdesc *
>>>    fec_enet_txq_submit_frag_skb(struct fec_enet_priv_tx_q *txq,
>>>                             struct sk_buff *skb, @@ -1285,7 +1335,6 @@
>>> fec_stop(struct net_device *ndev)
>>>        }
>>>    }
>>>
>>> -
>>>    static void
>>>    fec_timeout(struct net_device *ndev, unsigned int txqueue)
>>>    {
>>> @@ -1450,7 +1499,7 @@ static void fec_enet_tx(struct net_device *ndev)
>>>                fec_enet_tx_queue(ndev, i);
>>>    }
>>>
>>> -static int
>>> +static int __maybe_unused
>>>    fec_enet_new_rxbdp(struct net_device *ndev, struct bufdesc *bdp, struct sk_buff *skb)
>>>    {
>>>        struct  fec_enet_private *fep = netdev_priv(ndev); @@ -1470,8
>>> +1519,9 @@ fec_enet_new_rxbdp(struct net_device *ndev, struct bufdesc
>> *bdp, struct sk_buff
>>>        return 0;
>>>    }
>>>
>>> -static bool fec_enet_copybreak(struct net_device *ndev, struct sk_buff **skb,
>>> -                            struct bufdesc *bdp, u32 length, bool swap)
>>> +static bool __maybe_unused
>>> +fec_enet_copybreak(struct net_device *ndev, struct sk_buff **skb,
>>> +                struct bufdesc *bdp, u32 length, bool swap)
>>>    {
>>>        struct  fec_enet_private *fep = netdev_priv(ndev);
>>>        struct sk_buff *new_skb;
>>> @@ -1496,6 +1546,78 @@ static bool fec_enet_copybreak(struct net_device *ndev, struct sk_buff **skb,
>>>        return true;
>>>    }
>>>
>>> +static void fec_enet_update_cbd(struct fec_enet_priv_rx_q *rxq,
>>> +                             struct bufdesc *bdp, int index) {
>>> +     struct page *new_page;
>>> +     dma_addr_t phys_addr;
>>> +
>>> +     new_page = page_pool_dev_alloc_pages(rxq->page_pool);
>>> +     WARN_ON(!new_page);
>>> +     rxq->rx_skb_info[index].page = new_page;
>>> +
>>> +     rxq->rx_skb_info[index].offset = FEC_ENET_XDP_HEADROOM;
>>> +     phys_addr = page_pool_get_dma_addr(new_page) + FEC_ENET_XDP_HEADROOM;
>>> +     bdp->cbd_bufaddr = cpu_to_fec32(phys_addr); }
>>> +
>>> +static u32
>>> +fec_enet_run_xdp(struct fec_enet_private *fep, struct bpf_prog *prog,
>>> +              struct xdp_buff *xdp, struct fec_enet_priv_rx_q *rxq,
>>> +int index) {
>>> +     unsigned int sync, len = xdp->data_end - xdp->data;
>>> +     u32 ret = FEC_ENET_XDP_PASS;
>>> +     struct page *page;
>>> +     int err;
>>> +     u32 act;
>>> +
>>> +     act = bpf_prog_run_xdp(prog, xdp);
>>> +
>>> +     /* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */
>>> +     sync = xdp->data_end - xdp->data_hard_start - FEC_ENET_XDP_HEADROOM;
>>> +     sync = max(sync, len);
>>> +
>>> +     switch (act) {
>>> +     case XDP_PASS:
>>> +             rxq->stats.xdp_pass++;
>>> +             ret = FEC_ENET_XDP_PASS;
>>> +             break;
>>> +
>>> +     case XDP_TX:
>>> +             rxq->stats.xdp_tx++;
>>> +             bpf_warn_invalid_xdp_action(fep->netdev, prog, act);
>>> +             fallthrough;
>>
>> This fallthrough looks wrong. The next xdp_do_redirect() call will pickup left-
>> overs in per CPU bpf_redirect_info.
>>
> 
> So before the XDP_TX is implemented, this part of codes should reside below the XDP_REDIRECT case?
> 

If that fallthrough goes to dropping packet, then yes.

>>> +
>>> +     case XDP_REDIRECT:
>>> +             err = xdp_do_redirect(fep->netdev, xdp, prog);
>>> +             rxq->stats.xdp_redirect++;
>>> -                     dma_unmap_single(&fep->pdev->dev,
> ...
> 
>>> -                                      fec32_to_cpu(bdp->cbd_bufaddr),
>>> -                                      FEC_ENET_RX_FRSIZE - fep->rx_align,
>>> -                                      DMA_FROM_DEVICE);
>>> -             }
>>> -
>>> -             prefetch(skb->data - NET_IP_ALIGN);
>>> +             skb = build_skb(page_address(page), FEC_ENET_RX_FRSIZE);
>>
>> This looks wrong, I think FEC_ENET_RX_FRSIZE should be replaced by PAGE_SIZE.
>> As build_skb() does:
>>
>>    size -= SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
>>
> 
> Agree. As the current FRSIZE definition did not subtract the sizeof(struct skb_shared_info), I happened to not see the problem during the testing.
> 

As I wrote use PAGE_SIZE here.


>>> +             skb_reserve(skb, FEC_ENET_XDP_HEADROOM);
>>
>> The skb_reserve looks correct.
>>
>>>                skb_put(skb, pkt_len - 4);
>>>                data = skb->data;
>>> +             page_pool_release_page(rxq->page_pool, page);
>>
>> Today page_pool have SKB recycle support (you might have looked at drivers
>> that didn't utilize this yet), thus you don't need to release the page
>> (page_pool_release_page) here.  Instead you could simply mark the SKB for
>> recycling, unless driver does some page refcnt tricks I didn't notice.
>>
>>    skb_mark_for_recycle(skb);

I hope you try out the above proposed change.

>>
>>> -             if (!is_copybreak && need_swap)
>>> +             if (need_swap)
>>>                        swap_buffer(data, pkt_len);
>>>
>>>    #if !defined(CONFIG_M5272)
>>> @@ -1649,16 +1781,6 @@ fec_enet_rx_queue(struct net_device *ndev, int
>>> budget, u16 queue_id)
>> [...]
>

next prev parent reply	other threads:[~2022-09-29 15:46 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-09-28 15:25 [PATCH 1/1] net: fec: add initial XDP support Shenwei Wang
2022-09-28 21:18 ` kernel test robot
2022-09-29  1:33 ` Andrew Lunn
2022-09-29 12:40   ` [EXT] " Shenwei Wang
2022-09-29 13:22     ` Andrew Lunn
2022-09-29 13:26       ` Shenwei Wang
2022-09-29 15:19         ` Andrew Lunn
2022-09-29 15:28         ` Jesper Dangaard Brouer
2022-09-29 15:39           ` Andrew Lunn
2022-09-29 15:52           ` Shenwei Wang
2022-09-29 18:55             ` Jesper Dangaard Brouer
2022-10-03 12:49               ` Shenwei Wang
2022-10-04 11:21                 ` Jesper Dangaard Brouer
2022-10-04 13:12                   ` Shenwei Wang
2022-10-04 13:34                     ` Shenwei Wang
2022-10-05 12:40                       ` Shenwei Wang
2022-10-06  8:37                         ` Jesper Dangaard Brouer
2022-10-07  8:08                           ` Ilias Apalodimas
2022-10-07 19:18                             ` Shenwei Wang
2022-09-29  1:50 ` Andrew Lunn
2022-09-29 12:46   ` [EXT] " Shenwei Wang
2022-09-29 13:24     ` Andrew Lunn
2022-09-29 13:35       ` Shenwei Wang
2022-09-29  2:43 ` kernel test robot
2022-09-29 10:16 ` Jesper Dangaard Brouer
2022-09-29 13:11   ` [EXT] " Shenwei Wang
2022-09-29 15:44     ` Jesper Dangaard Brouer [this message]
2022-10-03  5:41 ` kernel test robot
  -- strict thread matches above, loose matches on Subject: below --
2022-10-25 20:11 Shenwei Wang
2022-10-25 22:08 ` Andrew Lunn
2022-10-27  1:50   ` [EXT] " Shenwei Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f1a4bd53-aa55-a040-9171-7f8a57649151@redhat.com \
    --to=jbrouer@redhat.com \
    --cc=ast@kernel.org \
    --cc=brouer@redhat.com \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=hawk@kernel.org \
    --cc=imx@lists.linux.dev \
    --cc=john.fastabend@gmail.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=qiangqing.zhang@nxp.com \
    --cc=shenwei.wang@nxp.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).