netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC net-next] netmem: replace __netmem_clear_lsb() with netmem_to_nmdesc()
@ 2025-07-28  4:20 Byungchul Park
  2025-07-28  5:35 ` Byungchul Park
  2025-07-28 17:44 ` Mina Almasry
  0 siblings, 2 replies; 8+ messages in thread
From: Byungchul Park @ 2025-07-28  4:20 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: davem, edumazet, kuba, pabeni, horms, almasrymina, hawk, toke,
	asml.silence

Now that we have struct netmem_desc, it'd better access the pp fields
via struct netmem_desc rather than struct net_iov.

Introduce netmem_to_nmdesc() for safely converting netmem_ref to
netmem_desc regardless of the type underneath e.i. netmem_desc, net_iov.

While at it, remove __netmem_clear_lsb() and make netmem_to_nmdesc()
used instead.

Signed-off-by: Byungchul Park <byungchul@sk.com>
---
 include/net/netmem.h   | 33 ++++++++++++++++-----------------
 net/core/netmem_priv.h | 16 ++++++++--------
 2 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/include/net/netmem.h b/include/net/netmem.h
index f7dacc9e75fd..33ae444a9745 100644
--- a/include/net/netmem.h
+++ b/include/net/netmem.h
@@ -265,24 +265,23 @@ static inline struct netmem_desc *__netmem_to_nmdesc(netmem_ref netmem)
 	return (__force struct netmem_desc *)netmem;
 }
 
-/* __netmem_clear_lsb - convert netmem_ref to struct net_iov * for access to
- * common fields.
- * @netmem: netmem reference to extract as net_iov.
+/* netmem_to_nmdesc - convert netmem_ref to struct netmem_desc * for
+ * access to common fields.
+ * @netmem: netmem reference to get netmem_desc.
  *
- * All the sub types of netmem_ref (page, net_iov) have the same pp, pp_magic,
- * dma_addr, and pp_ref_count fields at the same offsets. Thus, we can access
- * these fields without a type check to make sure that the underlying mem is
- * net_iov or page.
+ * All the sub types of netmem_ref (netmem_desc, net_iov) have the same
+ * pp, pp_magic, dma_addr, and pp_ref_count fields via netmem_desc.
  *
- * The resulting value of this function can only be used to access the fields
- * that are NET_IOV_ASSERT_OFFSET'd. Accessing any other fields will result in
- * undefined behavior.
- *
- * Return: the netmem_ref cast to net_iov* regardless of its underlying type.
+ * Return: the pointer to struct netmem_desc * regardless of its
+ * underlying type.
  */
-static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem)
+static inline struct netmem_desc *netmem_to_nmdesc(netmem_ref netmem)
 {
-	return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV);
+	if (netmem_is_net_iov(netmem))
+		return &((struct net_iov *)((__force unsigned long)netmem &
+					    ~NET_IOV))->desc;
+
+	return __netmem_to_nmdesc(netmem);
 }
 
 /* XXX: How to extract netmem_desc from page must be changed, once
@@ -320,12 +319,12 @@ static inline struct page_pool *__netmem_get_pp(netmem_ref netmem)
 
 static inline struct page_pool *netmem_get_pp(netmem_ref netmem)
 {
-	return __netmem_clear_lsb(netmem)->pp;
+	return netmem_to_nmdesc(netmem)->pp;
 }
 
 static inline atomic_long_t *netmem_get_pp_ref_count_ref(netmem_ref netmem)
 {
-	return &__netmem_clear_lsb(netmem)->pp_ref_count;
+	return &netmem_to_nmdesc(netmem)->pp_ref_count;
 }
 
 static inline bool netmem_is_pref_nid(netmem_ref netmem, int pref_nid)
@@ -390,7 +389,7 @@ static inline bool netmem_is_pfmemalloc(netmem_ref netmem)
 
 static inline unsigned long netmem_get_dma_addr(netmem_ref netmem)
 {
-	return __netmem_clear_lsb(netmem)->dma_addr;
+	return netmem_to_nmdesc(netmem)->dma_addr;
 }
 
 void get_netmem(netmem_ref netmem);
diff --git a/net/core/netmem_priv.h b/net/core/netmem_priv.h
index cd95394399b4..23175cb2bd86 100644
--- a/net/core/netmem_priv.h
+++ b/net/core/netmem_priv.h
@@ -5,19 +5,19 @@
 
 static inline unsigned long netmem_get_pp_magic(netmem_ref netmem)
 {
-	return __netmem_clear_lsb(netmem)->pp_magic & ~PP_DMA_INDEX_MASK;
+	return netmem_to_nmdesc(netmem)->pp_magic & ~PP_DMA_INDEX_MASK;
 }
 
 static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic)
 {
-	__netmem_clear_lsb(netmem)->pp_magic |= pp_magic;
+	netmem_to_nmdesc(netmem)->pp_magic |= pp_magic;
 }
 
 static inline void netmem_clear_pp_magic(netmem_ref netmem)
 {
-	WARN_ON_ONCE(__netmem_clear_lsb(netmem)->pp_magic & PP_DMA_INDEX_MASK);
+	WARN_ON_ONCE(netmem_to_nmdesc(netmem)->pp_magic & PP_DMA_INDEX_MASK);
 
-	__netmem_clear_lsb(netmem)->pp_magic = 0;
+	netmem_to_nmdesc(netmem)->pp_magic = 0;
 }
 
 static inline bool netmem_is_pp(netmem_ref netmem)
@@ -27,13 +27,13 @@ static inline bool netmem_is_pp(netmem_ref netmem)
 
 static inline void netmem_set_pp(netmem_ref netmem, struct page_pool *pool)
 {
-	__netmem_clear_lsb(netmem)->pp = pool;
+	netmem_to_nmdesc(netmem)->pp = pool;
 }
 
 static inline void netmem_set_dma_addr(netmem_ref netmem,
 				       unsigned long dma_addr)
 {
-	__netmem_clear_lsb(netmem)->dma_addr = dma_addr;
+	netmem_to_nmdesc(netmem)->dma_addr = dma_addr;
 }
 
 static inline unsigned long netmem_get_dma_index(netmem_ref netmem)
@@ -43,7 +43,7 @@ static inline unsigned long netmem_get_dma_index(netmem_ref netmem)
 	if (WARN_ON_ONCE(netmem_is_net_iov(netmem)))
 		return 0;
 
-	magic = __netmem_clear_lsb(netmem)->pp_magic;
+	magic = netmem_to_nmdesc(netmem)->pp_magic;
 
 	return (magic & PP_DMA_INDEX_MASK) >> PP_DMA_INDEX_SHIFT;
 }
@@ -57,6 +57,6 @@ static inline void netmem_set_dma_index(netmem_ref netmem,
 		return;
 
 	magic = netmem_get_pp_magic(netmem) | (id << PP_DMA_INDEX_SHIFT);
-	__netmem_clear_lsb(netmem)->pp_magic = magic;
+	netmem_to_nmdesc(netmem)->pp_magic = magic;
 }
 #endif

base-commit: fa582ca7e187a15e772e6a72fe035f649b387a60
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [RFC net-next] netmem: replace __netmem_clear_lsb() with netmem_to_nmdesc()
  2025-07-28  4:20 [RFC net-next] netmem: replace __netmem_clear_lsb() with netmem_to_nmdesc() Byungchul Park
@ 2025-07-28  5:35 ` Byungchul Park
  2025-07-28 17:44 ` Mina Almasry
  1 sibling, 0 replies; 8+ messages in thread
From: Byungchul Park @ 2025-07-28  5:35 UTC (permalink / raw)
  To: netdev, linux-kernel
  Cc: kernel_team, davem, edumazet, kuba, pabeni, horms, almasrymina,
	hawk, toke, asml.silence

> Now that we have struct netmem_desc, it'd better access the pp fields
> via struct netmem_desc rather than struct net_iov.
> 
> Introduce netmem_to_nmdesc() for safely converting netmem_ref to
> netmem_desc regardless of the type underneath e.i. netmem_desc, net_iov.
> 
> While at it, remove __netmem_clear_lsb() and make netmem_to_nmdesc()
> used instead.

+cc kernel_team@skhynix.com

	Byungchul

> Signed-off-by: Byungchul Park <byungchul@sk.com>
> ---
>  include/net/netmem.h   | 33 ++++++++++++++++-----------------
>  net/core/netmem_priv.h | 16 ++++++++--------
>  2 files changed, 24 insertions(+), 25 deletions(-)
> 
> diff --git a/include/net/netmem.h b/include/net/netmem.h
> index f7dacc9e75fd..33ae444a9745 100644
> --- a/include/net/netmem.h
> +++ b/include/net/netmem.h
> @@ -265,24 +265,23 @@ static inline struct netmem_desc *__netmem_to_nmdesc(netmem_ref netmem)
>  	return (__force struct netmem_desc *)netmem;
>  }
>  
> -/* __netmem_clear_lsb - convert netmem_ref to struct net_iov * for access to
> - * common fields.
> - * @netmem: netmem reference to extract as net_iov.
> +/* netmem_to_nmdesc - convert netmem_ref to struct netmem_desc * for
> + * access to common fields.
> + * @netmem: netmem reference to get netmem_desc.
>   *
> - * All the sub types of netmem_ref (page, net_iov) have the same pp, pp_magic,
> - * dma_addr, and pp_ref_count fields at the same offsets. Thus, we can access
> - * these fields without a type check to make sure that the underlying mem is
> - * net_iov or page.
> + * All the sub types of netmem_ref (netmem_desc, net_iov) have the same
> + * pp, pp_magic, dma_addr, and pp_ref_count fields via netmem_desc.
>   *
> - * The resulting value of this function can only be used to access the fields
> - * that are NET_IOV_ASSERT_OFFSET'd. Accessing any other fields will result in
> - * undefined behavior.
> - *
> - * Return: the netmem_ref cast to net_iov* regardless of its underlying type.
> + * Return: the pointer to struct netmem_desc * regardless of its
> + * underlying type.
>   */
> -static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem)
> +static inline struct netmem_desc *netmem_to_nmdesc(netmem_ref netmem)
>  {
> -	return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV);
> +	if (netmem_is_net_iov(netmem))
> +		return &((struct net_iov *)((__force unsigned long)netmem &
> +					    ~NET_IOV))->desc;
> +
> +	return __netmem_to_nmdesc(netmem);
>  }
>  
>  /* XXX: How to extract netmem_desc from page must be changed, once
> @@ -320,12 +319,12 @@ static inline struct page_pool *__netmem_get_pp(netmem_ref netmem)
>  
>  static inline struct page_pool *netmem_get_pp(netmem_ref netmem)
>  {
> -	return __netmem_clear_lsb(netmem)->pp;
> +	return netmem_to_nmdesc(netmem)->pp;
>  }
>  
>  static inline atomic_long_t *netmem_get_pp_ref_count_ref(netmem_ref netmem)
>  {
> -	return &__netmem_clear_lsb(netmem)->pp_ref_count;
> +	return &netmem_to_nmdesc(netmem)->pp_ref_count;
>  }
>  
>  static inline bool netmem_is_pref_nid(netmem_ref netmem, int pref_nid)
> @@ -390,7 +389,7 @@ static inline bool netmem_is_pfmemalloc(netmem_ref netmem)
>  
>  static inline unsigned long netmem_get_dma_addr(netmem_ref netmem)
>  {
> -	return __netmem_clear_lsb(netmem)->dma_addr;
> +	return netmem_to_nmdesc(netmem)->dma_addr;
>  }
>  
>  void get_netmem(netmem_ref netmem);
> diff --git a/net/core/netmem_priv.h b/net/core/netmem_priv.h
> index cd95394399b4..23175cb2bd86 100644
> --- a/net/core/netmem_priv.h
> +++ b/net/core/netmem_priv.h
> @@ -5,19 +5,19 @@
>  
>  static inline unsigned long netmem_get_pp_magic(netmem_ref netmem)
>  {
> -	return __netmem_clear_lsb(netmem)->pp_magic & ~PP_DMA_INDEX_MASK;
> +	return netmem_to_nmdesc(netmem)->pp_magic & ~PP_DMA_INDEX_MASK;
>  }
>  
>  static inline void netmem_or_pp_magic(netmem_ref netmem, unsigned long pp_magic)
>  {
> -	__netmem_clear_lsb(netmem)->pp_magic |= pp_magic;
> +	netmem_to_nmdesc(netmem)->pp_magic |= pp_magic;
>  }
>  
>  static inline void netmem_clear_pp_magic(netmem_ref netmem)
>  {
> -	WARN_ON_ONCE(__netmem_clear_lsb(netmem)->pp_magic & PP_DMA_INDEX_MASK);
> +	WARN_ON_ONCE(netmem_to_nmdesc(netmem)->pp_magic & PP_DMA_INDEX_MASK);
>  
> -	__netmem_clear_lsb(netmem)->pp_magic = 0;
> +	netmem_to_nmdesc(netmem)->pp_magic = 0;
>  }
>  
>  static inline bool netmem_is_pp(netmem_ref netmem)
> @@ -27,13 +27,13 @@ static inline bool netmem_is_pp(netmem_ref netmem)
>  
>  static inline void netmem_set_pp(netmem_ref netmem, struct page_pool *pool)
>  {
> -	__netmem_clear_lsb(netmem)->pp = pool;
> +	netmem_to_nmdesc(netmem)->pp = pool;
>  }
>  
>  static inline void netmem_set_dma_addr(netmem_ref netmem,
>  				       unsigned long dma_addr)
>  {
> -	__netmem_clear_lsb(netmem)->dma_addr = dma_addr;
> +	netmem_to_nmdesc(netmem)->dma_addr = dma_addr;
>  }
>  
>  static inline unsigned long netmem_get_dma_index(netmem_ref netmem)
> @@ -43,7 +43,7 @@ static inline unsigned long netmem_get_dma_index(netmem_ref netmem)
>  	if (WARN_ON_ONCE(netmem_is_net_iov(netmem)))
>  		return 0;
>  
> -	magic = __netmem_clear_lsb(netmem)->pp_magic;
> +	magic = netmem_to_nmdesc(netmem)->pp_magic;
>  
>  	return (magic & PP_DMA_INDEX_MASK) >> PP_DMA_INDEX_SHIFT;
>  }
> @@ -57,6 +57,6 @@ static inline void netmem_set_dma_index(netmem_ref netmem,
>  		return;
>  
>  	magic = netmem_get_pp_magic(netmem) | (id << PP_DMA_INDEX_SHIFT);
> -	__netmem_clear_lsb(netmem)->pp_magic = magic;
> +	netmem_to_nmdesc(netmem)->pp_magic = magic;
>  }
>  #endif
> 
> base-commit: fa582ca7e187a15e772e6a72fe035f649b387a60
> -- 
> 2.17.1
> 

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC net-next] netmem: replace __netmem_clear_lsb() with netmem_to_nmdesc()
  2025-07-28  4:20 [RFC net-next] netmem: replace __netmem_clear_lsb() with netmem_to_nmdesc() Byungchul Park
  2025-07-28  5:35 ` Byungchul Park
@ 2025-07-28 17:44 ` Mina Almasry
  2025-07-28 18:46   ` Pavel Begunkov
  2025-07-29  1:10   ` Byungchul Park
  1 sibling, 2 replies; 8+ messages in thread
From: Mina Almasry @ 2025-07-28 17:44 UTC (permalink / raw)
  To: Byungchul Park
  Cc: netdev, linux-kernel, davem, edumazet, kuba, pabeni, horms, hawk,
	toke, asml.silence

On Sun, Jul 27, 2025 at 9:21 PM Byungchul Park <byungchul@sk.com> wrote:
>
> Now that we have struct netmem_desc, it'd better access the pp fields
> via struct netmem_desc rather than struct net_iov.
>
> Introduce netmem_to_nmdesc() for safely converting netmem_ref to
> netmem_desc regardless of the type underneath e.i. netmem_desc, net_iov.
>
> While at it, remove __netmem_clear_lsb() and make netmem_to_nmdesc()
> used instead.
>
> Signed-off-by: Byungchul Park <byungchul@sk.com>

Thank you for working on paying this tech debt!

> ---
>  include/net/netmem.h   | 33 ++++++++++++++++-----------------
>  net/core/netmem_priv.h | 16 ++++++++--------
>  2 files changed, 24 insertions(+), 25 deletions(-)
>
> diff --git a/include/net/netmem.h b/include/net/netmem.h
> index f7dacc9e75fd..33ae444a9745 100644
> --- a/include/net/netmem.h
> +++ b/include/net/netmem.h
> @@ -265,24 +265,23 @@ static inline struct netmem_desc *__netmem_to_nmdesc(netmem_ref netmem)
>         return (__force struct netmem_desc *)netmem;
>  }
>
> -/* __netmem_clear_lsb - convert netmem_ref to struct net_iov * for access to
> - * common fields.
> - * @netmem: netmem reference to extract as net_iov.
> +/* netmem_to_nmdesc - convert netmem_ref to struct netmem_desc * for
> + * access to common fields.
> + * @netmem: netmem reference to get netmem_desc.
>   *
> - * All the sub types of netmem_ref (page, net_iov) have the same pp, pp_magic,
> - * dma_addr, and pp_ref_count fields at the same offsets. Thus, we can access
> - * these fields without a type check to make sure that the underlying mem is
> - * net_iov or page.
> + * All the sub types of netmem_ref (netmem_desc, net_iov) have the same
> + * pp, pp_magic, dma_addr, and pp_ref_count fields via netmem_desc.
>   *
> - * The resulting value of this function can only be used to access the fields
> - * that are NET_IOV_ASSERT_OFFSET'd. Accessing any other fields will result in
> - * undefined behavior.
> - *

I think instead of removing this warning, we want to add an
NET_IOV_ASSERT_OFFSET that asserts that net_iov->netmem_desc and
page->netmem_desc are in the same offset, and then add a note here
that this works because we assert that the netmem_desc offset in both
net_iov and page are the same.

> - * Return: the netmem_ref cast to net_iov* regardless of its underlying type.
> + * Return: the pointer to struct netmem_desc * regardless of its
> + * underlying type.
>   */
> -static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem)
> +static inline struct netmem_desc *netmem_to_nmdesc(netmem_ref netmem)
>  {
> -       return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV);
> +       if (netmem_is_net_iov(netmem))
> +               return &((struct net_iov *)((__force unsigned long)netmem &
> +                                           ~NET_IOV))->desc;
> +
> +       return __netmem_to_nmdesc(netmem);

The if statement generates overhead. I'd rather avoid it. We can
implement netmem_to_nmdesc like this, no?

netmem_to_nmdesc(netmem_ref netmem)
{
  return (struct netmem_desc)((__force unsigned long)netmem & ~NET_IOV);
}

Because netmem_desc is the first element in both net_iov and page for
the moment. (yes I know that will change eventually, but we don't have
to incur overhead of an extra if statement until netmem_desc is
removed from page, right?)


-- 
Thanks,
Mina

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC net-next] netmem: replace __netmem_clear_lsb() with netmem_to_nmdesc()
  2025-07-28 17:44 ` Mina Almasry
@ 2025-07-28 18:46   ` Pavel Begunkov
  2025-07-28 18:58     ` Pavel Begunkov
  2025-07-29  1:10   ` Byungchul Park
  1 sibling, 1 reply; 8+ messages in thread
From: Pavel Begunkov @ 2025-07-28 18:46 UTC (permalink / raw)
  To: Mina Almasry, Byungchul Park
  Cc: netdev, linux-kernel, davem, edumazet, kuba, pabeni, horms, hawk,
	toke

On 7/28/25 18:44, Mina Almasry wrote:
> On Sun, Jul 27, 2025 at 9:21 PM Byungchul Park <byungchul@sk.com> wrote:
>>

...>> - * Return: the netmem_ref cast to net_iov* regardless of its underlying type.
>> + * Return: the pointer to struct netmem_desc * regardless of its
>> + * underlying type.
>>    */
>> -static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem)
>> +static inline struct netmem_desc *netmem_to_nmdesc(netmem_ref netmem)
>>   {
>> -       return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV);
>> +       if (netmem_is_net_iov(netmem))
>> +               return &((struct net_iov *)((__force unsigned long)netmem &
>> +                                           ~NET_IOV))->desc;
>> +
>> +       return __netmem_to_nmdesc(netmem);
> 
> The if statement generates overhead. I'd rather avoid it. We can
> implement netmem_to_nmdesc like this, no?
> 
> netmem_to_nmdesc(netmem_ref netmem)
> {
>    return (struct netmem_desc)((__force unsigned long)netmem & ~NET_IOV);
> }
> 
> Because netmem_desc is the first element in both net_iov and page for
> the moment. (yes I know that will change eventually, but we don't have
> to incur overhead of an extra if statement until netmem_desc is
> removed from page, right?)

Same concern, but I think the goal here should be to make enough
info to the compiler to optimise it out without assumptions on
the layouts nor NET_IOV_ASSERT_OFFSET. Currently it's not so bad,
but we should be able to remove this test+cmove.

	movq	%rdi, %rax	# netmem, tmp105
	andq	$-2, %rax	#, tmp105
	testb	$1, %dil	#, netmem
	cmove	%rdi, %rax	# tmp105,, netmem, <retval>
	jmp	__x86_return_thunk


-- 
Pavel Begunkov


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC net-next] netmem: replace __netmem_clear_lsb() with netmem_to_nmdesc()
  2025-07-28 18:46   ` Pavel Begunkov
@ 2025-07-28 18:58     ` Pavel Begunkov
  2025-07-29  1:17       ` Byungchul Park
  0 siblings, 1 reply; 8+ messages in thread
From: Pavel Begunkov @ 2025-07-28 18:58 UTC (permalink / raw)
  To: Mina Almasry, Byungchul Park
  Cc: netdev, linux-kernel, davem, edumazet, kuba, pabeni, horms, hawk,
	toke

On 7/28/25 19:46, Pavel Begunkov wrote:
> On 7/28/25 18:44, Mina Almasry wrote:
>> On Sun, Jul 27, 2025 at 9:21 PM Byungchul Park <byungchul@sk.com> wrote:
>>>
> 
> ...>> - * Return: the netmem_ref cast to net_iov* regardless of its underlying type.
>>> + * Return: the pointer to struct netmem_desc * regardless of its
>>> + * underlying type.
>>>    */
>>> -static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem)
>>> +static inline struct netmem_desc *netmem_to_nmdesc(netmem_ref netmem)
>>>   {
>>> -       return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV);
>>> +       if (netmem_is_net_iov(netmem))
>>> +               return &((struct net_iov *)((__force unsigned long)netmem &
>>> +                                           ~NET_IOV))->desc;
>>> +
>>> +       return __netmem_to_nmdesc(netmem);
>>
>> The if statement generates overhead. I'd rather avoid it. We can
>> implement netmem_to_nmdesc like this, no?
>>
>> netmem_to_nmdesc(netmem_ref netmem)
>> {
>>    return (struct netmem_desc)((__force unsigned long)netmem & ~NET_IOV);
>> }
>>
>> Because netmem_desc is the first element in both net_iov and page for
>> the moment. (yes I know that will change eventually, but we don't have
>> to incur overhead of an extra if statement until netmem_desc is
>> removed from page, right?)
> 
> Same concern, but I think the goal here should be to make enough

s/make/give/


> info to the compiler to optimise it out without assumptions on
> the layouts nor NET_IOV_ASSERT_OFFSET. Currently it's not so bad,
> but we should be able to remove this test+cmove.
> 
>      movq    %rdi, %rax    # netmem, tmp105
>      andq    $-2, %rax    #, tmp105
>      testb    $1, %dil    #, netmem
>      cmove    %rdi, %rax    # tmp105,, netmem, <retval>
>      jmp    __x86_return_thunk

struct netmem_desc *netmem_to_nmdesc(netmem_ref netmem)
{
	void *p = (void *)((__force unsigned long)netmem & ~NET_IOV);

	if (netmem_is_net_iov(netmem))
		return &((struct net_iov *)p)->desc;
	return __pp_page_to_nmdesc((struct page *)p);
}

movq	%rdi, %rax	# netmem, netmem
andq	$-2, %rax	#, netmem
jmp	__x86_return_thunk


This should do it, and if the layouts change, it'd still
remain correct.

-- 
Pavel Begunkov


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC net-next] netmem: replace __netmem_clear_lsb() with netmem_to_nmdesc()
  2025-07-28 17:44 ` Mina Almasry
  2025-07-28 18:46   ` Pavel Begunkov
@ 2025-07-29  1:10   ` Byungchul Park
  1 sibling, 0 replies; 8+ messages in thread
From: Byungchul Park @ 2025-07-29  1:10 UTC (permalink / raw)
  To: Mina Almasry
  Cc: netdev, linux-kernel, davem, edumazet, kuba, pabeni, horms, hawk,
	toke, asml.silence, kernel_team

On Mon, Jul 28, 2025 at 10:44:31AM -0700, Mina Almasry wrote:
> On Sun, Jul 27, 2025 at 9:21 PM Byungchul Park <byungchul@sk.com> wrote:
> >
> > Now that we have struct netmem_desc, it'd better access the pp fields
> > via struct netmem_desc rather than struct net_iov.
> >
> > Introduce netmem_to_nmdesc() for safely converting netmem_ref to
> > netmem_desc regardless of the type underneath e.i. netmem_desc, net_iov.
> >
> > While at it, remove __netmem_clear_lsb() and make netmem_to_nmdesc()
> > used instead.
> >
> > Signed-off-by: Byungchul Park <byungchul@sk.com>
> 
> Thank you for working on paying this tech debt!

I thought it was appropriate to organize the code I modified to some
extent.

> > ---
> >  include/net/netmem.h   | 33 ++++++++++++++++-----------------
> >  net/core/netmem_priv.h | 16 ++++++++--------
> >  2 files changed, 24 insertions(+), 25 deletions(-)
> >
> > diff --git a/include/net/netmem.h b/include/net/netmem.h
> > index f7dacc9e75fd..33ae444a9745 100644
> > --- a/include/net/netmem.h
> > +++ b/include/net/netmem.h
> > @@ -265,24 +265,23 @@ static inline struct netmem_desc *__netmem_to_nmdesc(netmem_ref netmem)
> >         return (__force struct netmem_desc *)netmem;
> >  }
> >
> > -/* __netmem_clear_lsb - convert netmem_ref to struct net_iov * for access to
> > - * common fields.
> > - * @netmem: netmem reference to extract as net_iov.
> > +/* netmem_to_nmdesc - convert netmem_ref to struct netmem_desc * for
> > + * access to common fields.
> > + * @netmem: netmem reference to get netmem_desc.
> >   *
> > - * All the sub types of netmem_ref (page, net_iov) have the same pp, pp_magic,
> > - * dma_addr, and pp_ref_count fields at the same offsets. Thus, we can access
> > - * these fields without a type check to make sure that the underlying mem is
> > - * net_iov or page.
> > + * All the sub types of netmem_ref (netmem_desc, net_iov) have the same
> > + * pp, pp_magic, dma_addr, and pp_ref_count fields via netmem_desc.
> >   *
> > - * The resulting value of this function can only be used to access the fields
> > - * that are NET_IOV_ASSERT_OFFSET'd. Accessing any other fields will result in
> > - * undefined behavior.
> > - *
> 
> I think instead of removing this warning, we want to add an
> NET_IOV_ASSERT_OFFSET that asserts that net_iov->netmem_desc and
> page->netmem_desc are in the same offset, and then add a note here
> that this works because we assert that the netmem_desc offset in both
> net_iov and page are the same.

It doesn't have to have the same offset.  Why do you want it?  Is it for
some optimizaiton?  Or I think it's unnecessary constraint.

> > - * Return: the netmem_ref cast to net_iov* regardless of its underlying type.
> > + * Return: the pointer to struct netmem_desc * regardless of its
> > + * underlying type.
> >   */
> > -static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem)
> > +static inline struct netmem_desc *netmem_to_nmdesc(netmem_ref netmem)
> >  {
> > -       return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV);
> > +       if (netmem_is_net_iov(netmem))
> > +               return &((struct net_iov *)((__force unsigned long)netmem &
> > +                                           ~NET_IOV))->desc;
> > +
> > +       return __netmem_to_nmdesc(netmem);
> 
> The if statement generates overhead. I'd rather avoid it. We can
> implement netmem_to_nmdesc like this, no?
> 
> netmem_to_nmdesc(netmem_ref netmem)
> {
>   return (struct netmem_desc)((__force unsigned long)netmem & ~NET_IOV);
> }

I see.  You want this kind of optimization.  I will do this way if you
want.

> Because netmem_desc is the first element in both net_iov and page for
> the moment. (yes I know that will change eventually, but we don't have
> to incur overhead of an extra if statement until netmem_desc is
> removed from page, right?)

Okay.

	Byungchul
> 
> 
> --
> Thanks,
> Mina

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC net-next] netmem: replace __netmem_clear_lsb() with netmem_to_nmdesc()
  2025-07-28 18:58     ` Pavel Begunkov
@ 2025-07-29  1:17       ` Byungchul Park
  2025-07-29  9:11         ` Pavel Begunkov
  0 siblings, 1 reply; 8+ messages in thread
From: Byungchul Park @ 2025-07-29  1:17 UTC (permalink / raw)
  To: Pavel Begunkov
  Cc: Mina Almasry, netdev, linux-kernel, davem, edumazet, kuba, pabeni,
	horms, hawk, toke, kernel_team

On Mon, Jul 28, 2025 at 07:58:13PM +0100, Pavel Begunkov wrote:
> On 7/28/25 19:46, Pavel Begunkov wrote:
> > On 7/28/25 18:44, Mina Almasry wrote:
> > > On Sun, Jul 27, 2025 at 9:21 PM Byungchul Park <byungchul@sk.com> wrote:
> > > > 
> > 
> > ...>> - * Return: the netmem_ref cast to net_iov* regardless of its underlying type.
> > > > + * Return: the pointer to struct netmem_desc * regardless of its
> > > > + * underlying type.
> > > >    */
> > > > -static inline struct net_iov *__netmem_clear_lsb(netmem_ref netmem)
> > > > +static inline struct netmem_desc *netmem_to_nmdesc(netmem_ref netmem)
> > > >   {
> > > > -       return (struct net_iov *)((__force unsigned long)netmem & ~NET_IOV);
> > > > +       if (netmem_is_net_iov(netmem))
> > > > +               return &((struct net_iov *)((__force unsigned long)netmem &
> > > > +                                           ~NET_IOV))->desc;
> > > > +
> > > > +       return __netmem_to_nmdesc(netmem);
> > > 
> > > The if statement generates overhead. I'd rather avoid it. We can
> > > implement netmem_to_nmdesc like this, no?
> > > 
> > > netmem_to_nmdesc(netmem_ref netmem)
> > > {
> > >    return (struct netmem_desc)((__force unsigned long)netmem & ~NET_IOV);
> > > }
> > > 
> > > Because netmem_desc is the first element in both net_iov and page for
> > > the moment. (yes I know that will change eventually, but we don't have
> > > to incur overhead of an extra if statement until netmem_desc is
> > > removed from page, right?)
> > 
> > Same concern, but I think the goal here should be to make enough
> 
> s/make/give/
> 
> 
> > info to the compiler to optimise it out without assumptions on
> > the layouts nor NET_IOV_ASSERT_OFFSET. Currently it's not so bad,
> > but we should be able to remove this test+cmove.
> > 
> >      movq    %rdi, %rax    # netmem, tmp105
> >      andq    $-2, %rax    #, tmp105
> >      testb    $1, %dil    #, netmem
> >      cmove    %rdi, %rax    # tmp105,, netmem, <retval>
> >      jmp    __x86_return_thunk
> 
> struct netmem_desc *netmem_to_nmdesc(netmem_ref netmem)
> {
>        void *p = (void *)((__force unsigned long)netmem & ~NET_IOV);
> 
>        if (netmem_is_net_iov(netmem))
>                return &((struct net_iov *)p)->desc;
>        return __pp_page_to_nmdesc((struct page *)p);
> }

I wanted to remove constraints that can be removed, but Mina want not to
add additional overhead more.  So I'm thinking to keep the constraint,
'netmem_desc is the first member of net_iov'.

Thoughts?

	Byungchul

> movq    %rdi, %rax      # netmem, netmem
> andq    $-2, %rax       #, netmem
> jmp     __x86_return_thunk
> 
> 
> This should do it, and if the layouts change, it'd still
> remain correct.
> 
> --
> Pavel Begunkov

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [RFC net-next] netmem: replace __netmem_clear_lsb() with netmem_to_nmdesc()
  2025-07-29  1:17       ` Byungchul Park
@ 2025-07-29  9:11         ` Pavel Begunkov
  0 siblings, 0 replies; 8+ messages in thread
From: Pavel Begunkov @ 2025-07-29  9:11 UTC (permalink / raw)
  To: Byungchul Park
  Cc: Mina Almasry, netdev, linux-kernel, davem, edumazet, kuba, pabeni,
	horms, hawk, toke, kernel_team

On 7/29/25 02:17, Byungchul Park wrote:
> On Mon, Jul 28, 2025 at 07:58:13PM +0100, Pavel Begunkov wrote:
>> On 7/28/25 19:46, Pavel Begunkov wrote:
>>> On 7/28/25 18:44, Mina Almasry wrote:
>>>> On Sun, Jul 27, 2025 at 9:21 PM Byungchul Park <byungchul@sk.com> wrote:
...>>
>>
>>> info to the compiler to optimise it out without assumptions on
>>> the layouts nor NET_IOV_ASSERT_OFFSET. Currently it's not so bad,
>>> but we should be able to remove this test+cmove.
>>>
>>>       movq    %rdi, %rax    # netmem, tmp105
>>>       andq    $-2, %rax    #, tmp105
>>>       testb    $1, %dil    #, netmem
>>>       cmove    %rdi, %rax    # tmp105,, netmem, <retval>
>>>       jmp    __x86_return_thunk
>>
>> struct netmem_desc *netmem_to_nmdesc(netmem_ref netmem)
>> {
>>         void *p = (void *)((__force unsigned long)netmem & ~NET_IOV);
>>
>>         if (netmem_is_net_iov(netmem))
>>                 return &((struct net_iov *)p)->desc;
>>         return __pp_page_to_nmdesc((struct page *)p);
>> }
> 
> I wanted to remove constraints that can be removed, but Mina want not to
> add additional overhead more.  So I'm thinking to keep the constraint,
> 'netmem_desc is the first member of net_iov'.
> 
> Thoughts?

We don't want extra overhead, the function above doesn't add
any, so just use it. You don't need to explicitly check that
layouts match.

-- 
Pavel Begunkov


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2025-07-29  9:10 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-28  4:20 [RFC net-next] netmem: replace __netmem_clear_lsb() with netmem_to_nmdesc() Byungchul Park
2025-07-28  5:35 ` Byungchul Park
2025-07-28 17:44 ` Mina Almasry
2025-07-28 18:46   ` Pavel Begunkov
2025-07-28 18:58     ` Pavel Begunkov
2025-07-29  1:17       ` Byungchul Park
2025-07-29  9:11         ` Pavel Begunkov
2025-07-29  1:10   ` Byungchul Park

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).