Netdev List
 help / color / mirror / Atom feed
From: Allison Henderson <achender@kernel.org>
To: Breno Leitao <leitao@debian.org>,
	"David S. Miller" <davem@davemloft.net>,
	 Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	 Simon Horman <horms@kernel.org>, Shuah Khan <shuah@kernel.org>
Cc: linux-kernel@vger.kernel.org, netdev@vger.kernel.org,
	 linux-rdma@vger.kernel.org, rds-devel@oss.oracle.com,
	 linux-kselftest@vger.kernel.org, kernel-team@meta.com
Subject: Re: [PATCH net-next 2/2] selftests: net: rds: add getsockopt() conversion test
Date: Fri, 05 Jun 2026 02:56:09 -0700	[thread overview]
Message-ID: <134c50a1ca2d2f2bc06336072c89ff011a9842cb.camel@kernel.org> (raw)
In-Reply-To: <20260603-getsock_more-v1-2-43b8d40c8849@debian.org>

On Wed, 2026-06-03 at 09:11 -0700, Breno Leitao wrote:
> Add a kselftest that exercises the RDS getsockopt() paths converted to
> the getsockopt_iter() / sockopt_t callback:
> 
> - RDS_RECVERR and SO_RDS_TRANSPORT, which return their int value through
>   copy_to_iter() and report the written length in opt->optlen.
> 
> - RDS_INFO_*, which obtains the userspace buffer pages with
>   iov_iter_extract_pages() (including a non-zero starting page offset)
>   and lets the info producers copy the snapshot in under a spinlock.
> 
> Signed-off-by: Breno Leitao <leitao@debian.org>

Hi Breno,

Thanks for working on this.  Your test covers a lot of socket behavior that the
existing packet test doesn't get into, so I definitely think it's worth adding.
I ran it locally under vng and it passes for me.  I did notice one nit below, but
otherwise I think it looks really good.


> ---
>  tools/testing/selftests/net/rds/.gitignore   |   1 +
>  tools/testing/selftests/net/rds/Makefile     |   4 +
>  tools/testing/selftests/net/rds/getsockopt.c | 201 +++++++++++++++++++++++++++
>  3 files changed, 206 insertions(+)
> 
> diff --git a/tools/testing/selftests/net/rds/.gitignore b/tools/testing/selftests/net/rds/.gitignore
> index 1c6f04e2aa11..7ca4b1440f51 100644
> --- a/tools/testing/selftests/net/rds/.gitignore
> +++ b/tools/testing/selftests/net/rds/.gitignore
> @@ -1 +1,2 @@
>  include.sh
> +getsockopt
> diff --git a/tools/testing/selftests/net/rds/Makefile b/tools/testing/selftests/net/rds/Makefile
> index fe363be8e358..0700d8298eec 100644
> --- a/tools/testing/selftests/net/rds/Makefile
> +++ b/tools/testing/selftests/net/rds/Makefile
> @@ -5,6 +5,8 @@ all:
>  
>  TEST_PROGS := run.sh
>  
> +TEST_GEN_PROGS := getsockopt
> +
>  TEST_FILES := \
>  	include.sh \
>  	settings \
> @@ -16,4 +18,6 @@ EXTRA_CLEAN := \
>  	/tmp/rds_logs \
>  # end of EXTRA_CLEAN
>  
> +CFLAGS += $(KHDR_INCLUDES)
> +
>  include ../../lib.mk
> diff --git a/tools/testing/selftests/net/rds/getsockopt.c b/tools/testing/selftests/net/rds/getsockopt.c
> new file mode 100644
> index 000000000000..a82ffe4871c2
> --- /dev/null
> +++ b/tools/testing/selftests/net/rds/getsockopt.c
> @@ -0,0 +1,201 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Exercise the RDS getsockopt() paths that were converted to the
> + * getsockopt_iter() / sockopt_t callback.
> + *
> + * Three distinct paths are covered:
> + *
> + *   - RDS_RECVERR and SO_RDS_TRANSPORT, which now return their int value
> + *     through copy_to_iter() and report the written length in opt->optlen.
> + *
> + *   - RDS_INFO_*, which pins the userspace buffer with
> + *     iov_iter_extract_pages() (including a non-zero starting page offset)
> + *     and lets the info producers memcpy the snapshot in under a spinlock.
> + *
> + * The kvec (in-kernel buffer) -> -EOPNOTSUPP path of rds_info_getsockopt()
> + * is not reachable from a userspace getsockopt() and so is not tested here.
> + */
> +#include <errno.h>
> +#include <stdint.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <sys/mman.h>
> +#include <sys/socket.h>
> +#include <linux/rds.h>
> +
> +#include "../../kselftest_harness.h"
> +
> +#ifndef AF_RDS
> +#define AF_RDS 21
> +#endif
> +
> +FIXTURE(rds) {
> +	int fd;
> +};
> +
> +FIXTURE_SETUP(rds)
> +{
> +	self->fd = socket(AF_RDS, SOCK_SEQPACKET, 0);
> +	if (self->fd < 0)
> +		SKIP(return, "AF_RDS unavailable (errno %d) - load the rds module",
> +		     errno);
> +}
> +
> +FIXTURE_TEARDOWN(rds)
> +{
> +	if (self->fd >= 0)
> +		close(self->fd);
> +}
> +
> +/* RDS_RECVERR defaults to 0 and is reported back as a 4-byte int. */
> +TEST_F(rds, recverr_default)
> +{
> +	socklen_t len = sizeof(int);
> +	int val = 0xdeadbeef;
> +
> +	ASSERT_EQ(0, getsockopt(self->fd, SOL_RDS, RDS_RECVERR, &val, &len));
> +	EXPECT_EQ(sizeof(int), len);
> +	EXPECT_EQ(0, val);
> +}
> +
> +/* A value set via setsockopt() must be readable back unchanged. */
> +TEST_F(rds, recverr_set_get)
> +{
> +	socklen_t len = sizeof(int);
> +	int val = 1;
> +
> +	ASSERT_EQ(0, setsockopt(self->fd, SOL_RDS, RDS_RECVERR, &val, len));
> +
> +	val = 0;
> +	ASSERT_EQ(0, getsockopt(self->fd, SOL_RDS, RDS_RECVERR, &val, &len));
> +	EXPECT_EQ(sizeof(int), len);
> +	EXPECT_EQ(1, val);
> +}
> +
> +/* A buffer smaller than an int is rejected with EINVAL, not silently. */
> +TEST_F(rds, recverr_short_buffer)
> +{
> +	socklen_t len = sizeof(int) - 1;
> +	char buf[sizeof(int)];
> +
> +	EXPECT_EQ(-1, getsockopt(self->fd, SOL_RDS, RDS_RECVERR, buf, &len));
> +	EXPECT_EQ(EINVAL, errno);
> +}
> +
> +/* An unbound socket reports RDS_TRANS_NONE for SO_RDS_TRANSPORT. */
> +TEST_F(rds, transport_unbound)
> +{
> +	socklen_t len = sizeof(int);
> +	int val = 0;
> +
> +	ASSERT_EQ(0, getsockopt(self->fd, SOL_RDS, SO_RDS_TRANSPORT, &val,
> +				&len));
> +	EXPECT_EQ(sizeof(int), len);
> +	EXPECT_EQ(RDS_TRANS_NONE, (unsigned int)val);
> +}
> +
> +TEST_F(rds, transport_short_buffer)
> +{
> +	socklen_t len = sizeof(int) - 1;
> +	char buf[sizeof(int)];
> +
> +	EXPECT_EQ(-1, getsockopt(self->fd, SOL_RDS, SO_RDS_TRANSPORT, buf,
> +				 &len));
> +	EXPECT_EQ(EINVAL, errno);
> +}
> +
> +/*
> + * RDS_INFO_COUNTERS with a zero-length buffer is the "probe" call: it must
> + * fail with ENOSPC and report the required snapshot size in optlen.
> + */
> +TEST_F(rds, info_counters_probe)
> +{
> +	socklen_t len = 0;
> +
> +	EXPECT_EQ(-1, getsockopt(self->fd, SOL_RDS, RDS_INFO_COUNTERS, NULL,
> +				 &len));
> +	EXPECT_EQ(ENOSPC, errno);
> +	EXPECT_GT(len, 0);
> +	/* The snapshot is an array of fixed-size counter records. */
> +	EXPECT_EQ(0, len % (socklen_t)sizeof(struct rds_info_counter));
> +}
> +
> +/*
> + * A real snapshot into an unaligned userspace buffer exercises the
> + * iov_iter_extract_pages() path, including the non-zero offset0 handling
> + * that the patch reworked. Place the buffer at a non-page-aligned address
> + * spanning into the next page to make sure multi-page pinning works too.
> + */
> +TEST_F(rds, info_counters_snapshot)
> +{
> +	struct rds_info_counter *ctr;
> +	socklen_t need = 0, len;
> +	long pagesz = sysconf(_SC_PAGESIZE);
> +	unsigned int i, n;
> +	char *region, *buf;
> +	int ret;
> +
> +	/* Probe for the required size. */
> +	getsockopt(self->fd, SOL_RDS, RDS_INFO_COUNTERS, NULL, &need);
> +	ASSERT_GT(need, 0);
> +
> +	region = mmap(NULL, 2 * pagesz, PROT_READ | PROT_WRITE,
> +		      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);

The 2-page mapping (and the ASSERT_LE guarding it) ties the test to the snapshot
fitting in two pages.  This works right now, but ideally the region here should
account for the number of counters that come back from RDS_INFO_COUNTERS so that
if the total number of counters were to grow later, we don't overrun a region
that's set to a fixed number of pages.  So in this case, we'd want the need + offset,
and then page align that so we can mmap it:

	offset	= pagesz - 64; 
	map_len	= ((offset + need + pagesz - 1) / pagesz) * pagesz;  /* round (off+need) up to whole pages */
	region  = mmap(NULL, map_len, ...);                 


> +	ASSERT_NE(MAP_FAILED, region);
> +
> +	/* Unaligned start that runs past the first page boundary. */
> +	buf = region + pagesz - 64;
Then this simplifies to:
	buf = region + offset;

> +	ASSERT_LE(need, (socklen_t)(pagesz + 64));
And then the ASSERT here can be removed

> +
> +	/*
> +	 * On success the RDS_INFO path returns the positive per-element size
> +	 * (lens.each) rather than 0, and writes the full snapshot length back
> +	 * into optlen.
> +	 */
> +	len = need;
> +	ret = getsockopt(self->fd, SOL_RDS, RDS_INFO_COUNTERS, buf, &len);
> +	ASSERT_GE(ret, 0) {
> +		TH_LOG("getsockopt snapshot failed: errno %d", errno);
> +	}
> +	EXPECT_EQ(sizeof(struct rds_info_counter), ret);
> +	EXPECT_EQ(need, len);
> +
> +	/* The counter names must be NUL-terminated, non-empty strings. */
> +	ctr = (struct rds_info_counter *)buf;
> +	n = len / sizeof(*ctr);
> +	ASSERT_GT(n, 0);
> +	for (i = 0; i < n; i++) {
> +		size_t namelen = strnlen((char *)ctr[i].name,
> +					 sizeof(ctr[i].name));
> +
> +		EXPECT_GT(namelen, 0);
> +		EXPECT_LT(namelen, sizeof(ctr[i].name));
> +	}
> +
> +	munmap(region, 2 * pagesz);

Finally this turns into:
        munmap(region, map_len);

That's the only thing that really stood out.  I think the rest of it looks really good.  Thanks for putting this
together!

Allison
> +}
> +
> +/*
> + * A non-zero but too-small buffer must report ENOSPC and the full required
> + * length, without corrupting memory past the buffer.
> + */
> +TEST_F(rds, info_counters_short_buffer)
> +{
> +	socklen_t need = 0, len;
> +	char small[sizeof(struct rds_info_counter)];
> +
> +	getsockopt(self->fd, SOL_RDS, RDS_INFO_COUNTERS, NULL, &need);
> +	ASSERT_GT(need, 0);
> +
> +	/* Ask with a buffer guaranteed smaller than the full snapshot. */
> +	if (need <= (socklen_t)sizeof(small))
> +		SKIP(return, "snapshot fits in one record; nothing to test");
> +
> +	len = 1; /* < sizeof(struct rds_info_counter) */
> +	EXPECT_EQ(-1, getsockopt(self->fd, SOL_RDS, RDS_INFO_COUNTERS, small,
> +				 &len));
> +	EXPECT_EQ(ENOSPC, errno);
> +	EXPECT_EQ(need, len);
> +}
> +
> +TEST_HARNESS_MAIN
> 


  reply	other threads:[~2026-06-05  9:56 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-06-03 16:11 [PATCH net-next 0/2] net: rds: convert rds to getsockopt_iter Breno Leitao
2026-06-03 16:11 ` [PATCH net-next 1/2] rds: convert " Breno Leitao
2026-06-05  2:20   ` Allison Henderson
2026-06-05 10:07     ` Breno Leitao
2026-06-03 16:11 ` [PATCH net-next 2/2] selftests: net: rds: add getsockopt() conversion test Breno Leitao
2026-06-05  9:56   ` Allison Henderson [this message]
2026-06-05 10:09     ` Breno Leitao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=134c50a1ca2d2f2bc06336072c89ff011a9842cb.camel@kernel.org \
    --to=achender@kernel.org \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=horms@kernel.org \
    --cc=kernel-team@meta.com \
    --cc=kuba@kernel.org \
    --cc=leitao@debian.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=rds-devel@oss.oracle.com \
    --cc=shuah@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox