All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stanislav Fomichev <sdf.kernel@gmail.com>
To: Bobby Eshleman <bobbyeshleman@gmail.com>
Cc: "Donald Hunter" <donald.hunter@gmail.com>,
	"Jakub Kicinski" <kuba@kernel.org>,
	"David S. Miller" <davem@davemloft.net>,
	"Eric Dumazet" <edumazet@google.com>,
	"Paolo Abeni" <pabeni@redhat.com>,
	"Simon Horman" <horms@kernel.org>,
	"Andrew Lunn" <andrew+netdev@lunn.ch>,
	"Gerd Hoffmann" <kraxel@redhat.com>,
	"Vivek Kasireddy" <vivek.kasireddy@intel.com>,
	"Sumit Semwal" <sumit.semwal@linaro.org>,
	"Christian König" <christian.koenig@amd.com>,
	"Shuah Khan" <shuah@kernel.org>,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	dri-devel@lists.freedesktop.org, linux-media@vger.kernel.org,
	linaro-mm-sig@lists.linaro.org, linux-kselftest@vger.kernel.org,
	sdf@fomichev.me, razor@blackwall.org, daniel@iogearbox.net,
	almasrymina@google.com, matttbe@kernel.org, skhawaja@google.com,
	dw@davidwei.uk, "Bobby Eshleman" <bobbyeshleman@meta.com>
Subject: Re: [PATCH net-next v2 3/4] selftests/net: ncdevmem: add -b option to set rx-buf-size on bind
Date: Thu, 11 Jun 2026 14:22:54 -0700	[thread overview]
Message-ID: <aislm0fS2MSDMzKf@devvm7509.cco0.facebook.com> (raw)
In-Reply-To: <20260611-tcpdm-large-niovs-v2-3-ee2bf15e7523@meta.com>

On 06/11, Bobby Eshleman wrote:
> From: Bobby Eshleman <bobbyeshleman@meta.com>
> 
> Add -b <bytes> to request a non-default niov size via
> NETDEV_A_DMABUF_RX_BUF_SIZE. When the value exceeds PAGE_SIZE,
> udmabuf_alloc() switches to an MFD_HUGETLB-backed memfd so each 2 MB
> hugepage produces one naturally-aligned sg entry.
> 
> Reject values > 2 MB up front: MFD_HUGETLB + udmabuf can only guarantee
> 2 MB per sg entry (one hugepage), so a larger rx_buf_size would fail the
> per-sg length/alignment check.
> 
> Add CONFIG_HUGETLBFS=y to drivers/net/hw/config so the new path is
> reachable in the CI kernels built for these tests.
> 
> Signed-off-by: Bobby Eshleman <bobbyeshleman@meta.com>
> ---
>  tools/testing/selftests/drivers/net/hw/config     |  1 +
>  tools/testing/selftests/drivers/net/hw/ncdevmem.c | 49 +++++++++++++++++++++--
>  2 files changed, 47 insertions(+), 3 deletions(-)
> 
> diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config
> index cd20024218cd..ed8642b68094 100644
> --- a/tools/testing/selftests/drivers/net/hw/config
> +++ b/tools/testing/selftests/drivers/net/hw/config
> @@ -3,6 +3,7 @@ CONFIG_FAIL_FUNCTION=y
>  CONFIG_FAULT_INJECTION=y
>  CONFIG_FAULT_INJECTION_DEBUG_FS=y
>  CONFIG_FUNCTION_ERROR_INJECTION=y
> +CONFIG_HUGETLBFS=y
>  CONFIG_INET6_ESP=y
>  CONFIG_INET6_ESP_OFFLOAD=y
>  CONFIG_INET_ESP=y
> diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
> index d96e8a3b5a65..325c128191e2 100644
> --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c
> +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
> @@ -61,6 +61,7 @@
>  #include <sys/time.h>
>  
>  #include <linux/memfd.h>
> +#include <sys/param.h>
>  #include <linux/dma-buf.h>
>  #include <linux/errqueue.h>
>  #include <linux/udmabuf.h>
> @@ -79,6 +80,7 @@
>  #define PAGE_SHIFT 12
>  #define TEST_PREFIX "ncdevmem"
>  #define NUM_PAGES 16000
> +#define MB(x) ((x) << 20)
>  
>  #ifndef MSG_SOCK_DEVMEM
>  #define MSG_SOCK_DEVMEM 0x2000000
> @@ -100,6 +102,7 @@ static unsigned int dmabuf_id;
>  static uint32_t tx_dmabuf_id;
>  static int waittime_ms = 500;
>  static bool fail_on_linear;
> +static uint32_t rx_buf_size;
>  
>  /* System state loaded by current_config_load() */
>  #define MAX_FLOWS	8
> @@ -142,6 +145,7 @@ static struct memory_buffer *udmabuf_alloc(size_t size)
>  {
>  	struct udmabuf_create create;
>  	struct memory_buffer *ctx;
> +	unsigned int memfd_flags;
>  	int ret;
>  
>  	ctx = malloc(sizeof(*ctx));
> @@ -156,9 +160,14 @@ static struct memory_buffer *udmabuf_alloc(size_t size)
>  		goto err_free_ctx;
>  	}
>  
> -	ctx->memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING);
> +	memfd_flags = MFD_ALLOW_SEALING;

[..]

> +	if (rx_buf_size > (uint32_t)getpagesize())

What's the logic behind explicit (uint32_t) cast? uint vs int
comparisons should promote the int to uint automatically?

> +		memfd_flags |= MFD_HUGETLB | MFD_HUGE_2MB;
> +
> +	ctx->memfd = memfd_create("udmabuf-test", memfd_flags);
>  	if (ctx->memfd < 0) {
> -		pr_err("[skip,no-memfd]");
> +		pr_err("[skip,no-memfd%s]",
> +		       (memfd_flags & MFD_HUGETLB) ? " (need hugepages)" : "");
>  		goto err_close_dev;
>  	}
>  
> @@ -168,6 +177,11 @@ static struct memory_buffer *udmabuf_alloc(size_t size)
>  		goto err_close_memfd;
>  	}
>  
> +	if (memfd_flags & MFD_HUGETLB) {
> +		size = roundup(size, MB(2));
> +		ctx->size = size;
> +	}
> +
>  	ret = ftruncate(ctx->memfd, size);
>  	if (ret == -1) {
>  		pr_err("[FAIL,memfd-truncate]");
> @@ -699,6 +713,8 @@ static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
>  	netdev_bind_rx_req_set_ifindex(req, ifindex);
>  	netdev_bind_rx_req_set_fd(req, dmabuf_fd);
>  	__netdev_bind_rx_req_set_queues(req, queues, n_queue_index);
> +	if (rx_buf_size)
> +		netdev_bind_rx_req_set_rx_buf_size(req, rx_buf_size);
>  
>  	rsp = netdev_bind_rx(*ys, req);
>  	if (!rsp) {
> @@ -1411,7 +1427,7 @@ int main(int argc, char *argv[])
>  	int is_server = 0, opt;
>  	int ret, err = 1;
>  
> -	while ((opt = getopt(argc, argv, "Lls:c:p:v:q:t:f:z:n")) != -1) {
> +	while ((opt = getopt(argc, argv, "Lls:c:p:v:q:t:f:z:nb:")) != -1) {
>  		switch (opt) {
>  		case 'L':
>  			fail_on_linear = true;
> @@ -1446,6 +1462,33 @@ int main(int argc, char *argv[])
>  		case 'n':
>  			skip_config = 1;
>  			break;
> +		case 'b': {
> +			char *endp;
> +			unsigned long val;

Christmas tree here as well?

> +
> +			errno = 0;
> +			val = strtoul(optarg, &endp, 0);

[..]

> +			if (errno || endp == optarg || *endp || val == 0 ||
> +			    val > UINT32_MAX) {
> +				pr_err("invalid rx_buf_size: %s", optarg);
> +				return 1;
> +			}

This is too sophisticated :-/ Just (if val == UINT32_MAX && errno == ERANGE) ?
(you're looking for an overflow here supposedly?)

[..]

> +			if (val & (val - 1)) {
> +				pr_err("rx_buf_size must be a power of 2");
> +				return 1;
> +			}
> +			if (val < (unsigned long)getpagesize()) {
> +				pr_err("rx_buf_size must be >= PAGE_SIZE (%d)",
> +				       getpagesize());
> +				return 1;
> +			}
> +			if (val > MB(2)) {
> +				pr_err("rx_buf_size > 2 MB not supported");
> +				return 1;
> +			}

We already check these on the kernel size, so should be ok to drop? 

  reply	other threads:[~2026-06-11 21:23 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-11 15:28 [PATCH net-next v2 0/4] net: devmem: allow rx-buf-size > PAGE_SIZE per binding Bobby Eshleman
2026-06-11 15:28 ` [PATCH net-next v2 1/4] net: devmem: allow rx-buf-size > PAGE_SIZE per dmabuf binding Bobby Eshleman
2026-06-11 21:12   ` Stanislav Fomichev
2026-06-11 15:28 ` [PATCH net-next v2 2/4] udmabuf: emit one sg entry per pinned folio Bobby Eshleman
2026-06-11 15:28 ` [PATCH net-next v2 3/4] selftests/net: ncdevmem: add -b option to set rx-buf-size on bind Bobby Eshleman
2026-06-11 21:22   ` Stanislav Fomichev [this message]
2026-06-11 21:58     ` Bobby Eshleman
2026-06-11 15:28 ` [PATCH net-next v2 4/4] selftests/net: devmem.py: add check_rx_large_niov Bobby Eshleman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aislm0fS2MSDMzKf@devvm7509.cco0.facebook.com \
    --to=sdf.kernel@gmail.com \
    --cc=almasrymina@google.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=bobbyeshleman@gmail.com \
    --cc=bobbyeshleman@meta.com \
    --cc=christian.koenig@amd.com \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=donald.hunter@gmail.com \
    --cc=dri-devel@lists.freedesktop.org \
    --cc=dw@davidwei.uk \
    --cc=edumazet@google.com \
    --cc=horms@kernel.org \
    --cc=kraxel@redhat.com \
    --cc=kuba@kernel.org \
    --cc=linaro-mm-sig@lists.linaro.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=linux-media@vger.kernel.org \
    --cc=matttbe@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=razor@blackwall.org \
    --cc=sdf@fomichev.me \
    --cc=shuah@kernel.org \
    --cc=skhawaja@google.com \
    --cc=sumit.semwal@linaro.org \
    --cc=vivek.kasireddy@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.