bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Byungchul Park <byungchul@sk.com>
To: Pavel Begunkov <asml.silence@gmail.com>
Cc: willy@infradead.org, netdev@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-mm@kvack.org,
	kernel_team@skhynix.com, kuba@kernel.org, almasrymina@google.com,
	ilias.apalodimas@linaro.org, harry.yoo@oracle.com,
	hawk@kernel.org, akpm@linux-foundation.org, davem@davemloft.net,
	john.fastabend@gmail.com, andrew+netdev@lunn.ch, toke@redhat.com,
	tariqt@nvidia.com, edumazet@google.com, pabeni@redhat.com,
	saeedm@nvidia.com, leon@kernel.org, ast@kernel.org,
	daniel@iogearbox.net, david@redhat.com,
	lorenzo.stoakes@oracle.com, Liam.Howlett@oracle.com,
	vbabka@suse.cz, rppt@kernel.org, surenb@google.com,
	mhocko@suse.com, horms@kernel.org, linux-rdma@vger.kernel.org,
	bpf@vger.kernel.org, vishal.moola@gmail.com, hannes@cmpxchg.org,
	ziy@nvidia.com, jackmanb@google.com
Subject: Re: [PATCH net-next v9 1/8] netmem: introduce struct netmem_desc mirroring struct page
Date: Mon, 14 Jul 2025 20:58:57 +0900	[thread overview]
Message-ID: <20250714115857.GA31302@system.software.com> (raw)
In-Reply-To: <a7bd1e6f-b854-4172-a29a-3f0662c6fd6e@gmail.com>

On Mon, Jul 14, 2025 at 12:30:12PM +0100, Pavel Begunkov wrote:
> On 7/14/25 05:23, Byungchul Park wrote:
> > On Sat, Jul 12, 2025 at 03:39:59PM +0100, Pavel Begunkov wrote:
> > > On 7/10/25 09:28, Byungchul Park wrote:
> > > > To simplify struct page, the page pool members of struct page should be
> > > > moved to other, allowing these members to be removed from struct page.
> > > > 
> > > > Introduce a network memory descriptor to store the members, struct
> > > > netmem_desc, and make it union'ed with the existing fields in struct
> > > > net_iov, allowing to organize the fields of struct net_iov.
> > > 
> > > FWIW, regardless of memdesc business, I think it'd be great to have
> > > this patch, as it'll help with some of the netmem casting ugliness and
> > > shed some cycles as well. For example, we have a bunch of
> > > niov -> netmem -> niov casts in various places.
> > 
> > If Jakub agrees with this, I will re-post this as a separate patch so
> > that works that require this base can go ahead.
> 
> I think it'd be a good idea. It's needed to clean up netmem handling,
> and I'll convert io_uring and get rid of the union in niov.
> 
> The diff below should give a rough idea of what I want to use it for.
> It kills __netmem_clear_lsb() to avoid casting struct page * to niov.
> And saves some masking for zcrx, see page_pool_get_dma_addr_nmdesc(),
> and there are more places like that.
> 
> 
> diff --git a/include/net/netmem.h b/include/net/netmem.h
> index 535cf17b9134..41f3a3fd6b6c 100644
> --- a/include/net/netmem.h
> +++ b/include/net/netmem.h
> @@ -247,6 +247,8 @@ static inline unsigned long netmem_pfn_trace(netmem_ref netmem)
>        return page_to_pfn(netmem_to_page(netmem));
>  }
> 
> +#define pp_page_to_nmdesc(page)        ((struct netmem_desc *)(page))
> +
>  /* __netmem_clear_lsb - convert netmem_ref to struct net_iov * for access to
>   * common fields.
>   * @netmem: netmem reference to extract as net_iov.
> @@ -262,11 +264,18 @@ static inline unsigned long netmem_pfn_trace(netmem_ref netmem)
>   *
>   * Return: the netmem_ref cast to net_iov* regardless of its underlying type.
>   */
> -static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem)
> +static inline struct net_iov *__netmem_to_niov(netmem_ref netmem)
>  {
>        return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV);
>  }
> 
> +static inline struct netmem_desc *netmem_to_nmdesc(netmem_ref netmem)

I removed netmem_to_nmdesc() and its users, while I was working for v10.
However, You can add it if needed for your clean up.

I think I should share v10 now, to share the decision I made.

	Byungchul

> +{
> +       if (netmem_is_net_iov(netmem))
> +               return &__netmem_to_niov(netmem)->desc;
> +       return pp_page_to_nmdesc(__netmem_to_page(netmem));
> +}
> +
>  /**
>   * __netmem_get_pp - unsafely get pointer to the &page_pool backing @netmem
>   * @netmem: netmem reference to get the pointer from
> @@ -280,17 +289,17 @@ static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem)
>   */
>  static inline struct page_pool *__netmem_get_pp(netmem_ref netmem)
>  {
> -       return __netmem_to_page(netmem)->pp;
> +       return pp_page_to_nmdesc(__netmem_to_page(netmem))->pp;
>  }
> 
>  static inline struct page_pool *netmem_get_pp(netmem_ref netmem)
>  {
> -       return __netmem_clear_lsb(netmem)->pp;
> +       return netmem_to_nmdesc(netmem)->pp;
>  }
> 
>  static inline atomic_long_t *netmem_get_pp_ref_count_ref(netmem_ref netmem)
>  {
> -       return &__netmem_clear_lsb(netmem)->pp_ref_count;
> +       return &netmem_to_nmdesc(netmem)->pp_ref_count;
>  }
> 
>  static inline bool netmem_is_pref_nid(netmem_ref netmem, int pref_nid)
> @@ -355,7 +364,7 @@ static inline bool netmem_is_pfmemalloc(netmem_ref netmem)
> 
>  static inline unsigned long netmem_get_dma_addr(netmem_ref netmem)
>  {
> -       return __netmem_clear_lsb(netmem)->dma_addr;
> +       return netmem_to_nmdesc(netmem)->dma_addr;
>  }
> 
>  void get_netmem(netmem_ref netmem);
> diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h
> index db180626be06..002858f3bcb3 100644
> --- a/include/net/page_pool/helpers.h
> +++ b/include/net/page_pool/helpers.h
> @@ -425,9 +425,9 @@ static inline void page_pool_free_va(struct page_pool *pool, void *va,
>        page_pool_put_page(pool, virt_to_head_page(va), -1, allow_direct);
>  }
> 
> -static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem)
> +static inline dma_addr_t page_pool_get_dma_addr_nmdesc(struct netmem_desc *desc)
>  {
> -       dma_addr_t ret = netmem_get_dma_addr(netmem);
> +       dma_addr_t ret = desc->dma_addr;
> 
>        if (PAGE_POOL_32BIT_ARCH_WITH_64BIT_DMA)
>                ret <<= PAGE_SHIFT;
> @@ -435,6 +435,13 @@ static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem)
>        return ret;
>  }
> 
> +static inline dma_addr_t page_pool_get_dma_addr_netmem(netmem_ref netmem)
> +{
> +       struct netmem_desc *desc = netmem_to_nmdesc(netmem);
> +
> +       return page_pool_get_dma_addr_nmdesc(desc);
> +}
> +
>  /**
>   * page_pool_get_dma_addr() - Retrieve the stored DMA address.
>   * @page:     page allocated from a page pool
> diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
> index 085eeed8cd50..2e80692d9ee1 100644
> --- a/io_uring/zcrx.c
> +++ b/io_uring/zcrx.c
> @@ -290,7 +290,7 @@ static void io_zcrx_sync_for_device(const struct page_pool *pool,
>        if (!dma_dev_need_sync(pool->p.dev))
>                return;
> 
> -       dma_addr = page_pool_get_dma_addr_netmem(net_iov_to_netmem(niov));
> +       dma_addr = page_pool_get_dma_addr_nmdesc(&niov->desc);
>        __dma_sync_single_for_device(pool->p.dev, dma_addr + pool->p.offset,
>                                     PAGE_SIZE, pool->p.dma_dir);
>  #endif
> diff --git a/net/core/netmem_priv.h b/net/core/netmem_priv.h
> index cd95394399b4..97d4beda9174 100644
> --- a/net/core/netmem_priv.h
> +++ b/net/core/netmem_priv.h
> @@ -5,19 +5,21 @@
> 
>  static inline unsigned long netmem_get_pp_magic(netmem_ref netmem)
>  {
> -       return __netmem_clear_lsb(netmem)->pp_magic & ~PP_DMA_INDEX_MASK;
> +       return netmem_to_nmdesc(netmem)->pp_magic & ~PP_DMA_INDEX_MASK;
>  }
> 
>  static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic)
>  {
> -       __netmem_clear_lsb(netmem)->pp_magic |= pp_magic;
> +       netmem_to_nmdesc(netmem)->pp_magic |= pp_magic;
>  }
> 
>  static inline void netmem_clear_pp_magic(netmem_ref netmem)
>  {
> -       WARN_ON_ONCE(__netmem_clear_lsb(netmem)->pp_magic & PP_DMA_INDEX_MASK);
> +       struct netmem_desc *desc = netmem_to_nmdesc(netmem);
> 
> -       __netmem_clear_lsb(netmem)->pp_magic = 0;
> +       WARN_ON_ONCE(desc->pp_magic & PP_DMA_INDEX_MASK);
> +
> +       desc->pp_magic = 0;
>  }
> 
>  static inline bool netmem_is_pp(netmem_ref netmem)
> @@ -27,13 +29,13 @@ static inline bool netmem_is_pp(netmem_ref netmem)
> 
>  static inline void netmem_set_pp(netmem_ref netmem, struct page_pool *pool)
>  {
> -       __netmem_clear_lsb(netmem)->pp = pool;
> +       netmem_to_nmdesc(netmem)->pp = pool;
>  }
> 
>  static inline void netmem_set_dma_addr(netmem_ref netmem,
>                                       unsigned long dma_addr)
>  {
> -       __netmem_clear_lsb(netmem)->dma_addr = dma_addr;
> +       netmem_to_nmdesc(netmem)->dma_addr = dma_addr;
>  }
> 
>  static inline unsigned long netmem_get_dma_index(netmem_ref netmem)
> @@ -43,7 +45,7 @@ static inline unsigned long netmem_get_dma_index(netmem_ref netmem)
>        if (WARN_ON_ONCE(netmem_is_net_iov(netmem)))
>                return 0;
> 
> -       magic = __netmem_clear_lsb(netmem)->pp_magic;
> +       magic = netmem_to_nmdesc(netmem)->pp_magic;
> 
>        return (magic & PP_DMA_INDEX_MASK) >> PP_DMA_INDEX_SHIFT;
>  }
> @@ -51,12 +53,12 @@ static inline unsigned long netmem_get_dma_index(netmem_ref netmem)
>  static inline void netmem_set_dma_index(netmem_ref netmem,
>                                        unsigned long id)
>  {
> -       unsigned long magic;
> +       struct netmem_desc *desc;
> 
>        if (WARN_ON_ONCE(netmem_is_net_iov(netmem)))
>                return;
> 
> -       magic = netmem_get_pp_magic(netmem) | (id << PP_DMA_INDEX_SHIFT);
> -       __netmem_clear_lsb(netmem)->pp_magic = magic;
> +       desc = netmem_to_nmdesc(netmem);
> +       desc->pp_magic |= id << PP_DMA_INDEX_SHIFT;
>  }
>  #endif

  reply	other threads:[~2025-07-14 11:59 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-07-10  8:27 [PATCH net-next v9 0/8] Split netmem from struct page Byungchul Park
2025-07-10  8:28 ` [PATCH net-next v9 1/8] netmem: introduce struct netmem_desc mirroring " Byungchul Park
2025-07-12 14:39   ` Pavel Begunkov
2025-07-14  4:23     ` Byungchul Park
2025-07-14 11:30       ` Pavel Begunkov
2025-07-14 11:58         ` Byungchul Park [this message]
2025-07-14 19:17         ` Mina Almasry
2025-07-15 10:01           ` Pavel Begunkov
2025-07-10  8:28 ` [PATCH net-next v9 2/8] netmem: introduce utility APIs to use struct netmem_desc Byungchul Park
2025-07-10 18:11   ` Mina Almasry
2025-07-11  1:02     ` Byungchul Park
2025-07-12 12:16       ` Pavel Begunkov
2025-07-12 12:05     ` Pavel Begunkov
2025-07-12 11:59   ` Pavel Begunkov
2025-07-13 23:07     ` Byungchul Park
2025-07-13 23:39       ` Byungchul Park
2025-07-14  9:43       ` Pavel Begunkov
2025-07-14 10:05         ` Byungchul Park
2025-07-14 11:45           ` Pavel Begunkov
2025-07-14 12:06             ` Byungchul Park
2025-07-10  8:28 ` [PATCH net-next v9 3/8] page_pool: access ->pp_magic through struct netmem_desc in page_pool_page_is_pp() Byungchul Park
2025-07-10 18:19   ` Mina Almasry
2025-07-11  1:14     ` Byungchul Park
2025-07-12 13:58       ` Pavel Begunkov
2025-07-12 14:52         ` David Hildenbrand
2025-07-12 15:09           ` Pavel Begunkov
2025-07-13 23:22             ` Byungchul Park
2025-07-17  3:08         ` Byungchul Park
2025-07-22  1:23           ` Byungchul Park
2025-07-28 18:19             ` Pavel Begunkov
2025-07-14 19:09       ` Mina Almasry
2025-07-15  9:53         ` Pavel Begunkov
2025-07-10  8:28 ` [PATCH net-next v9 4/8] netmem: use netmem_desc instead of page to access ->pp in __netmem_get_pp() Byungchul Park
2025-07-10 18:25   ` Mina Almasry
2025-07-11  1:17     ` Byungchul Park
2025-07-10  8:28 ` [PATCH net-next v9 5/8] netmem: introduce a netmem API, virt_to_head_netmem() Byungchul Park
2025-07-10 18:26   ` Mina Almasry
2025-07-10  8:28 ` [PATCH net-next v9 6/8] mlx4: use netmem descriptor and APIs for page pool Byungchul Park
2025-07-10 18:29   ` Mina Almasry
2025-07-11  1:32     ` Byungchul Park
2025-07-14 19:02       ` Mina Almasry
2025-07-10  8:28 ` [PATCH net-next v9 7/8] netdevsim: " Byungchul Park
2025-07-10 18:26   ` Mina Almasry
2025-07-10  8:28 ` [PATCH net-next v9 8/8] mt76: " Byungchul Park
2025-07-12 14:22   ` Pavel Begunkov
2025-07-14  2:13     ` Byungchul Park
2025-07-10  8:47 ` [PATCH net-next v9 0/8] Split netmem from struct page Byungchul Park
2025-07-10 18:35 ` Mina Almasry
2025-07-11  0:42   ` Byungchul Park

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250714115857.GA31302@system.software.com \
    --to=byungchul@sk.com \
    --cc=Liam.Howlett@oracle.com \
    --cc=akpm@linux-foundation.org \
    --cc=almasrymina@google.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=asml.silence@gmail.com \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=david@redhat.com \
    --cc=edumazet@google.com \
    --cc=hannes@cmpxchg.org \
    --cc=harry.yoo@oracle.com \
    --cc=hawk@kernel.org \
    --cc=horms@kernel.org \
    --cc=ilias.apalodimas@linaro.org \
    --cc=jackmanb@google.com \
    --cc=john.fastabend@gmail.com \
    --cc=kernel_team@skhynix.com \
    --cc=kuba@kernel.org \
    --cc=leon@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=lorenzo.stoakes@oracle.com \
    --cc=mhocko@suse.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=rppt@kernel.org \
    --cc=saeedm@nvidia.com \
    --cc=surenb@google.com \
    --cc=tariqt@nvidia.com \
    --cc=toke@redhat.com \
    --cc=vbabka@suse.cz \
    --cc=vishal.moola@gmail.com \
    --cc=willy@infradead.org \
    --cc=ziy@nvidia.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).