All of lore.kernel.org
 help / color / mirror / Atom feed
From: Arnaldo Carvalho de Melo <acme@redhat.com>
To: David Miller <davem@davemloft.net>
Cc: netdev@vger.kernel.org, Chris Van Hoof <vanhoof@redhat.com>,
	Clark Williams <williams@redhat.com>
Subject: [RFC 0/2] New socket API: recvmmsg
Date: Wed, 20 May 2009 20:06:42 -0300	[thread overview]
Message-ID: <20090520230642.GA5956@ghostprotocols.net> (raw)

[-- Attachment #1: Type: text/plain, Size: 1841 bytes --]

Hi,

	The following two patches, that I cooked today and haven't
properly benchmarked, implements a new socket syscall, recvmmsg, that
stands for receive multiple messages, in one call.

	I implemented the attached program as a test case and to show
it in action, and lightly tested it using two clients (netcat) sending
big files from a machine with a 100 mbit/s NIC and another with a 1
Gbit/s NIC to a server with the patched kernel, output:

$ ./recvmmsg 5001 128
nr_datagrams received: 19
    4352 bytes received from doppio.ghostprotocols.net in 17 datagrams
    256 bytes received from filo.ghostprotocols.net in 1 datagrams
    256 bytes received from doppio.ghostprotocols.net in 1 datagrams
nr_datagrams received: 14
    2816 bytes received from doppio.ghostprotocols.net in 11 datagrams
    256 bytes received from filo.ghostprotocols.net in 1 datagrams
    512 bytes received from doppio.ghostprotocols.net in 2 datagrams
nr_datagrams received: 19
    2304 bytes received from doppio.ghostprotocols.net in 9 datagrams
    256 bytes received from filo.ghostprotocols.net in 1 datagrams
    2304 bytes received from doppio.ghostprotocols.net in 9 datagrams
nr_datagrams received: 14
    2816 bytes received from doppio.ghostprotocols.net in 11 datagrams
    256 bytes received from filo.ghostprotocols.net in 1 datagrams
    512 bytes received from doppio.ghostprotocols.net in 2 datagrams
nr_datagrams received: 19
    4608 bytes received from doppio.ghostprotocols.net in 18 datagrams
    256 bytes received from filo.ghostprotocols.net in 1 datagrams

filo is the machine with a 100 mbit/s NIC, obviously :-)

	There are some things I probably will change, like perhaps
pushing it deeper from socket to sock level, but I'd like to hear about
the general feeling about at least the userspace interface.

Best Regards,

- Arnaldo

[-- Attachment #2: recvmmsg.c --]
[-- Type: text/plain, Size: 3187 bytes --]

#include <stdlib.h>
#include <syscall.h>
#include <stdio.h>
#include <sys/socket.h>
#include <unistd.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <poll.h>
#include <string.h>

struct mmsghdr {
	struct msghdr	msg_hdr;
	unsigned	msg_len;
};

#if defined(__x86_64__) || defined(__i386__)
#include "linux-2.6-tip/arch/x86/include/asm/unistd.h"
#endif

static inline int recvmmsg(int fd, struct mmsghdr *mmsg,
			   unsigned vlen, unsigned flags)
{
	return syscall(__NR_recvmmsg, fd, mmsg, vlen, flags);
}

static void print_stats_peer(struct mmsghdr *datagram, int count, int bytes)
{
	char peer[1024];
	int err = getnameinfo(datagram->msg_hdr.msg_name,
			      datagram->msg_hdr.msg_namelen,
			      peer, sizeof(peer), NULL, 0, 0);
	if (err != 0) {
		fprintf(stderr, "error using getnameinfo: %s\n",
			gai_strerror(err));
			return;
		}
	printf("    %d bytes received from %s in %d datagrams\n",
	       bytes, peer, count);
}

int main(int argc, char *argv[])
{
	struct addrinfo *host;
	struct addrinfo hints = {
		.ai_family   = AF_INET,
		.ai_socktype = SOCK_DGRAM,
		.ai_protocol = IPPROTO_UDP,
		.ai_flags    = AI_PASSIVE,
	};
	const char *port = "5001";
	int batch_size = 8;
	int err, fd;
	int i;

	if (argc > 1)
		port = argv[1];

	if (argc > 2)
		batch_size = atoi(argv[2]);

	char buf[batch_size][256];
	struct iovec iovec[batch_size][1];
	struct sockaddr addr[batch_size];
	struct mmsghdr datagrams[batch_size];

	err = getaddrinfo(NULL, port, &hints, &host);
	if (err != 0) {
		fprintf(stderr, "error using getaddrinfo: %s\n",
			gai_strerror(err));
		goto out;
	}
	
	fd = socket(host->ai_family, host->ai_socktype, host->ai_protocol);
	if (fd < 0) {
		perror("socket: ");
		goto out_freeaddrinfo;
	}

	if (bind(fd, host->ai_addr, host->ai_addrlen) < 0) {
		perror("bind: ");
		goto out_close_server;
	}

	for (i = 0; i < batch_size; ++i) {
		iovec[i][0].iov_base = buf[i];
		iovec[i][0].iov_len  = sizeof(buf[i]);
		datagrams[i].msg_hdr.msg_iov	 = iovec[i];
		datagrams[i].msg_hdr.msg_iovlen	 = 1;
		datagrams[i].msg_hdr.msg_name	 = &addr[i];
		datagrams[i].msg_hdr.msg_namelen = sizeof(addr[i]);
	}

	struct pollfd pfds[1] = {
		[0] = {
			.fd = fd,
			.events = POLLIN,
		},
	};

	while (1) {
		if (poll(pfds, 1, -1) < 0) {
			perror("poll: ");
			return EXIT_FAILURE;
		}

		int nr_datagrams = recvmmsg(fd, datagrams, batch_size,
					    MSG_DONTWAIT);

		if (nr_datagrams == 0) {
			perror("recvmmsg: ");
			return EXIT_FAILURE;
		}

		printf("nr_datagrams received: %d\n", nr_datagrams);
		int peer_count = 1;
		int peer_bytes = datagrams[0].msg_len;
		for (i = 1; i < nr_datagrams; ++i) {
			if (memcmp(datagrams[i - 1].msg_hdr.msg_name,
				   datagrams[i].msg_hdr.msg_name,
				   datagrams[i].msg_hdr.msg_namelen) == 0) {
				++peer_count;
				peer_bytes += datagrams[i].msg_len;
				continue;
			}
			
			print_stats_peer(&datagrams[i - 1],
					 peer_count, peer_bytes);
			peer_bytes = datagrams[i].msg_len;
			peer_count = 1;
		}

		print_stats_peer(&datagrams[nr_datagrams - 1],
				 peer_count, peer_bytes);
	}
out_close_server:
	close(fd);
out_freeaddrinfo:
	freeaddrinfo(host);
out:
	return err;
}

             reply	other threads:[~2009-05-20 23:06 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-05-20 23:06 Arnaldo Carvalho de Melo [this message]
2009-05-21  0:30 ` [RFC 0/2] New socket API: recvmmsg David Miller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090520230642.GA5956@ghostprotocols.net \
    --to=acme@redhat.com \
    --cc=davem@davemloft.net \
    --cc=netdev@vger.kernel.org \
    --cc=vanhoof@redhat.com \
    --cc=williams@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.