All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stanislav Fomichev <stfomichev@gmail.com>
To: Mina Almasry <almasrymina@google.com>
Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	linux-doc@vger.kernel.org, kvm@vger.kernel.org,
	virtualization@lists.linux.dev, linux-kselftest@vger.kernel.org,
	"David S. Miller" <davem@davemloft.net>,
	"Eric Dumazet" <edumazet@google.com>,
	"Jakub Kicinski" <kuba@kernel.org>,
	"Paolo Abeni" <pabeni@redhat.com>,
	"Simon Horman" <horms@kernel.org>,
	"Donald Hunter" <donald.hunter@gmail.com>,
	"Jonathan Corbet" <corbet@lwn.net>,
	"Andrew Lunn" <andrew+netdev@lunn.ch>,
	"David Ahern" <dsahern@kernel.org>,
	"Stefan Hajnoczi" <stefanha@redhat.com>,
	"Stefano Garzarella" <sgarzare@redhat.com>,
	"Michael S. Tsirkin" <mst@redhat.com>,
	"Jason Wang" <jasowang@redhat.com>,
	"Xuan Zhuo" <xuanzhuo@linux.alibaba.com>,
	"Eugenio Pérez" <eperezma@redhat.com>,
	"Shuah Khan" <shuah@kernel.org>,
	sdf@fomichev.me, asml.silence@gmail.com, dw@davidwei.uk,
	"Jamal Hadi Salim" <jhs@mojatatu.com>,
	"Victor Nogueira" <victor@mojatatu.com>,
	"Pedro Tammela" <pctammela@mojatatu.com>
Subject: Re: [PATCH RFC net-next v2 2/6] selftests: ncdevmem: Implement devmem TCP TX
Date: Fri, 31 Jan 2025 09:41:50 -0800	[thread overview]
Message-ID: <Z50LXqkUwTBOdBe4@mini-arch> (raw)
In-Reply-To: <CAHS8izMMm--CSCm1c9Ud1WdjxLeCXdNiqLzjeM_ACgKUP35O0w@mail.gmail.com>

On 01/30, Mina Almasry wrote:
> On Thu, Jan 30, 2025 at 3:05 PM Stanislav Fomichev <stfomichev@gmail.com> wrote:
> >
> > On 01/30, Mina Almasry wrote:
> > > Add support for devmem TX in ncdevmem.
> > >
> > > This is a combination of the ncdevmem from the devmem TCP series RFCv1
> > > which included the TX path, and work by Stan to include the netlink API
> > > and refactored on top of his generic memory_provider support.
> > >
> > > Signed-off-by: Mina Almasry <almasrymina@google.com>
> > > Signed-off-by: Stanislav Fomichev <sdf@fomichev.me>
> > >
> > > ---
> > >
> > > v2:
> > > - make errors a static variable so that we catch instances where there
> > >   are less than 20 errors across different buffers.
> > > - Fix the issue where the seed is reset to 0 instead of its starting
> > >   value 1.
> > > - Use 1000ULL instead of 1000 to guard against overflow (Willem).
> > > - Do not set POLLERR (Willem).
> > > - Update the test to use the new interface where iov_base is the
> > >   dmabuf_offset.
> > > - Update the test to send 2 iov instead of 1, so we get some test
> > >   coverage over sending multiple iovs at once.
> > > - Print the ifindex the test is using, useful for debugging issues where
> > >   maybe the test may fail because the ifindex of the socket is different
> > >   from the dmabuf binding.
> > > ---
> > >  .../selftests/drivers/net/hw/ncdevmem.c       | 276 +++++++++++++++++-
> > >  1 file changed, 272 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
> > > index 19a6969643f4..8455f19ecd1a 100644
> > > --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c
> > > +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
> > > @@ -40,15 +40,18 @@
> > >  #include <fcntl.h>
> > >  #include <malloc.h>
> > >  #include <error.h>
> > > +#include <poll.h>
> > >
> > >  #include <arpa/inet.h>
> > >  #include <sys/socket.h>
> > >  #include <sys/mman.h>
> > >  #include <sys/ioctl.h>
> > >  #include <sys/syscall.h>
> > > +#include <sys/time.h>
> > >
> > >  #include <linux/memfd.h>
> > >  #include <linux/dma-buf.h>
> > > +#include <linux/errqueue.h>
> > >  #include <linux/udmabuf.h>
> > >  #include <libmnl/libmnl.h>
> > >  #include <linux/types.h>
> > > @@ -80,6 +83,8 @@ static int num_queues = -1;
> > >  static char *ifname;
> > >  static unsigned int ifindex;
> > >  static unsigned int dmabuf_id;
> > > +static uint32_t tx_dmabuf_id;
> > > +static int waittime_ms = 500;
> > >
> > >  struct memory_buffer {
> > >       int fd;
> > > @@ -93,6 +98,8 @@ struct memory_buffer {
> > >  struct memory_provider {
> > >       struct memory_buffer *(*alloc)(size_t size);
> > >       void (*free)(struct memory_buffer *ctx);
> > > +     void (*memcpy_to_device)(struct memory_buffer *dst, size_t off,
> > > +                              void *src, int n);
> > >       void (*memcpy_from_device)(void *dst, struct memory_buffer *src,
> > >                                  size_t off, int n);
> > >  };
> > > @@ -153,6 +160,20 @@ static void udmabuf_free(struct memory_buffer *ctx)
> > >       free(ctx);
> > >  }
> > >
> > > +static void udmabuf_memcpy_to_device(struct memory_buffer *dst, size_t off,
> > > +                                  void *src, int n)
> > > +{
> > > +     struct dma_buf_sync sync = {};
> > > +
> > > +     sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE;
> > > +     ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync);
> > > +
> > > +     memcpy(dst->buf_mem + off, src, n);
> > > +
> > > +     sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE;
> > > +     ioctl(dst->fd, DMA_BUF_IOCTL_SYNC, &sync);
> > > +}
> > > +
> > >  static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src,
> > >                                      size_t off, int n)
> > >  {
> > > @@ -170,6 +191,7 @@ static void udmabuf_memcpy_from_device(void *dst, struct memory_buffer *src,
> > >  static struct memory_provider udmabuf_memory_provider = {
> > >       .alloc = udmabuf_alloc,
> > >       .free = udmabuf_free,
> > > +     .memcpy_to_device = udmabuf_memcpy_to_device,
> > >       .memcpy_from_device = udmabuf_memcpy_from_device,
> > >  };
> > >
> > > @@ -188,7 +210,7 @@ void validate_buffer(void *line, size_t size)
> > >  {
> > >       static unsigned char seed = 1;
> > >       unsigned char *ptr = line;
> > > -     int errors = 0;
> > > +     static int errors;
> > >       size_t i;
> > >
> > >       for (i = 0; i < size; i++) {
> > > @@ -202,7 +224,7 @@ void validate_buffer(void *line, size_t size)
> > >               }
> > >               seed++;
> > >               if (seed == do_validation)
> > > -                     seed = 0;
> > > +                     seed = 1;
> > >       }
> > >
> > >       fprintf(stdout, "Validated buffer\n");
> > > @@ -394,6 +416,49 @@ static int bind_rx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
> > >       return -1;
> > >  }
> > >
> > > +static int bind_tx_queue(unsigned int ifindex, unsigned int dmabuf_fd,
> > > +                      struct ynl_sock **ys)
> > > +{
> > > +     struct netdev_bind_tx_req *req = NULL;
> > > +     struct netdev_bind_tx_rsp *rsp = NULL;
> > > +     struct ynl_error yerr;
> > > +
> > > +     *ys = ynl_sock_create(&ynl_netdev_family, &yerr);
> > > +     if (!*ys) {
> > > +             fprintf(stderr, "YNL: %s\n", yerr.msg);
> > > +             return -1;
> > > +     }
> > > +
> > > +     req = netdev_bind_tx_req_alloc();
> > > +     netdev_bind_tx_req_set_ifindex(req, ifindex);
> > > +     netdev_bind_tx_req_set_fd(req, dmabuf_fd);
> > > +
> > > +     rsp = netdev_bind_tx(*ys, req);
> > > +     if (!rsp) {
> > > +             perror("netdev_bind_tx");
> > > +             goto err_close;
> > > +     }
> > > +
> > > +     if (!rsp->_present.id) {
> > > +             perror("id not present");
> > > +             goto err_close;
> > > +     }
> > > +
> > > +     fprintf(stderr, "got tx dmabuf id=%d\n", rsp->id);
> > > +     tx_dmabuf_id = rsp->id;
> > > +
> > > +     netdev_bind_tx_req_free(req);
> > > +     netdev_bind_tx_rsp_free(rsp);
> > > +
> > > +     return 0;
> > > +
> > > +err_close:
> > > +     fprintf(stderr, "YNL failed: %s\n", (*ys)->err.msg);
> > > +     netdev_bind_tx_req_free(req);
> > > +     ynl_sock_destroy(*ys);
> > > +     return -1;
> > > +}
> > > +
> > >  static void enable_reuseaddr(int fd)
> > >  {
> > >       int opt = 1;
> > > @@ -432,7 +497,7 @@ static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6)
> > >       return 0;
> > >  }
> > >
> > > -int do_server(struct memory_buffer *mem)
> > > +static int do_server(struct memory_buffer *mem)
> > >  {
> > >       char ctrl_data[sizeof(int) * 20000];
> > >       struct netdev_queue_id *queues;
> > > @@ -686,6 +751,207 @@ void run_devmem_tests(void)
> > >       provider->free(mem);
> > >  }
> > >
> > > +static uint64_t gettimeofday_ms(void)
> > > +{
> > > +     struct timeval tv;
> > > +
> > > +     gettimeofday(&tv, NULL);
> > > +     return (tv.tv_sec * 1000ULL) + (tv.tv_usec / 1000ULL);
> > > +}
> > > +
> > > +static int do_poll(int fd)
> > > +{
> > > +     struct pollfd pfd;
> > > +     int ret;
> > > +
> > > +     pfd.revents = 0;
> > > +     pfd.fd = fd;
> > > +
> > > +     ret = poll(&pfd, 1, waittime_ms);
> > > +     if (ret == -1)
> > > +             error(1, errno, "poll");
> > > +
> > > +     return ret && (pfd.revents & POLLERR);
> > > +}
> > > +
> > > +static void wait_compl(int fd)
> > > +{
> > > +     int64_t tstop = gettimeofday_ms() + waittime_ms;
> > > +     char control[CMSG_SPACE(100)] = {};
> > > +     struct sock_extended_err *serr;
> > > +     struct msghdr msg = {};
> > > +     struct cmsghdr *cm;
> > > +     int retries = 10;
> > > +     __u32 hi, lo;
> > > +     int ret;
> > > +
> > > +     msg.msg_control = control;
> > > +     msg.msg_controllen = sizeof(control);
> > > +
> > > +     while (gettimeofday_ms() < tstop) {
> > > +             if (!do_poll(fd))
> > > +                     continue;
> > > +
> > > +             ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
> > > +             if (ret < 0) {
> > > +                     if (errno == EAGAIN)
> > > +                             continue;
> > > +                     error(1, ret, "recvmsg(MSG_ERRQUEUE)");
> > > +                     return;
> > > +             }
> > > +             if (msg.msg_flags & MSG_CTRUNC)
> > > +                     error(1, 0, "MSG_CTRUNC\n");
> > > +
> > > +             for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
> > > +                     if (cm->cmsg_level != SOL_IP &&
> > > +                         cm->cmsg_level != SOL_IPV6)
> > > +                             continue;
> > > +                     if (cm->cmsg_level == SOL_IP &&
> > > +                         cm->cmsg_type != IP_RECVERR)
> > > +                             continue;
> > > +                     if (cm->cmsg_level == SOL_IPV6 &&
> > > +                         cm->cmsg_type != IPV6_RECVERR)
> > > +                             continue;
> > > +
> > > +                     serr = (void *)CMSG_DATA(cm);
> > > +                     if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY)
> > > +                             error(1, 0, "wrong origin %u", serr->ee_origin);
> > > +                     if (serr->ee_errno != 0)
> > > +                             error(1, 0, "wrong errno %d", serr->ee_errno);
> > > +
> > > +                     hi = serr->ee_data;
> > > +                     lo = serr->ee_info;
> > > +
> > > +                     fprintf(stderr, "tx complete [%d,%d]\n", lo, hi);
> > > +                     return;
> > > +             }
> > > +     }
> > > +
> > > +     error(1, 0, "did not receive tx completion");
> > > +}
> > > +
> > > +static int do_client(struct memory_buffer *mem)
> > > +{
> > > +     char ctrl_data[CMSG_SPACE(sizeof(struct dmabuf_tx_cmsg))];
> > > +     struct sockaddr_in6 server_sin;
> > > +     struct sockaddr_in6 client_sin;
> > > +     struct dmabuf_tx_cmsg ddmabuf;
> > > +     struct ynl_sock *ys = NULL;
> > > +     struct msghdr msg = {};
> > > +     ssize_t line_size = 0;
> > > +     struct cmsghdr *cmsg;
> > > +     struct iovec iov[2];
> > > +     uint64_t off = 100;
> > > +     char *line = NULL;
> > > +     size_t len = 0;
> > > +     int socket_fd;
> > > +     int ret, mid;
> > > +     int opt = 1;
> > > +
> > > +     ret = parse_address(server_ip, atoi(port), &server_sin);
> > > +     if (ret < 0)
> > > +             error(1, 0, "parse server address");
> > > +
> > > +     socket_fd = socket(AF_INET6, SOCK_STREAM, 0);
> > > +     if (socket_fd < 0)
> > > +             error(1, socket_fd, "create socket");
> > > +
> > > +     enable_reuseaddr(socket_fd);
> > > +
> > > +     ret = setsockopt(socket_fd, SOL_SOCKET, SO_BINDTODEVICE, ifname,
> > > +                      strlen(ifname) + 1);
> > > +     if (ret)
> > > +             error(1, ret, "bindtodevice");
> > > +
> > > +     if (bind_tx_queue(ifindex, mem->fd, &ys))
> > > +             error(1, 0, "Failed to bind\n");
> > > +
> > > +     ret = parse_address(client_ip, atoi(port), &client_sin);
> > > +     if (ret < 0)
> > > +             error(1, 0, "parse client address");
> > > +
> > > +     ret = bind(socket_fd, &client_sin, sizeof(client_sin));
> > > +     if (ret)
> > > +             error(1, ret, "bind");
> > > +
> > > +     ret = setsockopt(socket_fd, SOL_SOCKET, SO_ZEROCOPY, &opt, sizeof(opt));
> > > +     if (ret)
> > > +             error(1, ret, "set sock opt");
> > > +
> > > +     fprintf(stderr, "Connect to %s %d (via %s)\n", server_ip,
> > > +             ntohs(server_sin.sin6_port), ifname);
> > > +
> > > +     ret = connect(socket_fd, &server_sin, sizeof(server_sin));
> > > +     if (ret)
> > > +             error(1, ret, "connect");
> > > +
> > > +     while (1) {
> > > +             free(line);
> > > +             line = NULL;
> > > +             /* Subtract 1 from line_size to remove trailing newlines that
> > > +              * get_line are surely to parse...
> > > +              */
> > > +             line_size = getline(&line, &len, stdin) - 1;
> >
> > Why not send the '\n' as well? If we skip the '\n', it's not keeping
> > netcat-like behavior :-(
> >
> 
> Ah, this is to make the validation on the RX side work. The validation
> expects a repeating pattern:
> 
> 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, ....
> 
> With no newlines.
> 
> But it does become weird that TX doesn't match netcat. Let me think on
> this a bit. Maybe I can resolve this in a way where the validation
> works but also the tx side behaves like netcat. Maybe the RX
> validation can skip newlines or something. Maybe I can massage how I
> invoke the test.
> 
> This can become a rabbit hole because I do want to invoke multiple
> sendmsg() in one iteration of the test as well, and not overcomplicate
> the series.

Then let's do it for validation mode only? FWIW, my existing (python
wrapper) tests pass with your series. I'll be sending a bunch of minor
comments about the selftest part separately shortly..

  reply	other threads:[~2025-01-31 17:41 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-30 21:15 [PATCH RFC net-next v2 0/6] Device memory TCP TX Mina Almasry
2025-01-30 21:15 ` [PATCH RFC net-next v2 1/6] net: add devmem TCP TX documentation Mina Almasry
2025-01-30 22:59   ` Stanislav Fomichev
2025-01-30 23:22     ` Mina Almasry
2025-01-31 17:39       ` Stanislav Fomichev
2025-01-30 21:15 ` [PATCH RFC net-next v2 2/6] selftests: ncdevmem: Implement devmem TCP TX Mina Almasry
2025-01-30 23:05   ` Stanislav Fomichev
2025-01-30 23:29     ` Mina Almasry
2025-01-31 17:41       ` Stanislav Fomichev [this message]
2025-01-31 17:45   ` Stanislav Fomichev
2025-01-30 21:15 ` [PATCH RFC net-next v2 3/6] net: add get_netmem/put_netmem support Mina Almasry
2025-01-30 21:15 ` [PATCH RFC net-next v2 4/6] net: devmem: TCP tx netlink api Mina Almasry
2025-01-31  8:29   ` Simon Horman
2025-01-30 21:15 ` [PATCH RFC net-next v2 5/6] net: devmem: Implement TX path Mina Almasry
2025-01-30 23:07   ` Stanislav Fomichev
2025-01-31  9:09   ` Simon Horman
2025-01-30 21:15 ` [PATCH RFC net-next v2 6/6] net: devmem: make dmabuf unbinding scheduled work Mina Almasry

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Z50LXqkUwTBOdBe4@mini-arch \
    --to=stfomichev@gmail.com \
    --cc=almasrymina@google.com \
    --cc=andrew+netdev@lunn.ch \
    --cc=asml.silence@gmail.com \
    --cc=corbet@lwn.net \
    --cc=davem@davemloft.net \
    --cc=donald.hunter@gmail.com \
    --cc=dsahern@kernel.org \
    --cc=dw@davidwei.uk \
    --cc=edumazet@google.com \
    --cc=eperezma@redhat.com \
    --cc=horms@kernel.org \
    --cc=jasowang@redhat.com \
    --cc=jhs@mojatatu.com \
    --cc=kuba@kernel.org \
    --cc=kvm@vger.kernel.org \
    --cc=linux-doc@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=pctammela@mojatatu.com \
    --cc=sdf@fomichev.me \
    --cc=sgarzare@redhat.com \
    --cc=shuah@kernel.org \
    --cc=stefanha@redhat.com \
    --cc=victor@mojatatu.com \
    --cc=virtualization@lists.linux.dev \
    --cc=xuanzhuo@linux.alibaba.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.