netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Rusty Russell <rusty@rustcorp.com.au>
To: David Miller <davem@davemloft.net>
Cc: netdev@vger.kernel.org, virtualization@lists.linux-foundation.org
Subject: Re: [RFC] virtio: orphan skbs if we're relying on timer to free them
Date: Mon, 25 May 2009 20:31:47 +0930	[thread overview]
Message-ID: <200905252031.47868.rusty@rustcorp.com.au> (raw)
In-Reply-To: <20090521.001503.90069315.davem@davemloft.net>

[-- Attachment #1: Type: text/plain, Size: 7249 bytes --]

On Thu, 21 May 2009 04:45:03 pm David Miller wrote:
> Probably the most profitable avenue is to see if this is a real issue
> afterall (see above).  If we can get away with having the socket
> buffer represent socket --> device space only, that's the most ideal
> solution.  It will probably also improve performance a lot across the
> board, especially on NUMA/SMP boxes as our TX complete events tend to
> be in difference places than the SKB producer.

OK, hacked in skb_orphan() and couldn't see any behavior change here.

Details: I wrote a little test program to do a 1 byte ping pong on a TCP
socket while doing 1k writes to a number of udp sockets.  We never completely
starve the TCP connection, though that may be because it begins at the same
time as the others and gets some traffic before the buffers fill up.

Running it with 0 to 20 sockets, from my gigabit laptop to my 100Mbit
desktop.  Sorry about the attachment, but sometimes a graph is worth about
1k words.

Not sure why write rate shoots up on serious overload though.  Perhaps we're
discarding earlier in the path?

Cheers,
Rusty.

diff --git a/net/core/dev.c b/net/core/dev.c
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1677,6 +1677,8 @@ int dev_hard_start_xmit(struct sk_buff *
 	const struct net_device_ops *ops = dev->netdev_ops;
 	int rc;
 
+	skb_orphan(skb);
+
 	if (likely(!skb->next)) {
 		if (!list_empty(&ptype_all))
 			dev_queue_xmit_nit(skb, dev);

Sender: for i in `seq 0 20`; do ~/devel/junk/udpstress 9999 $i debussy 5; sleep 1; done
Receiver: for i in `seq 0 20`; do ./udpstress -r 9999 $i; done

/* udpstress.c: Simple server & client to test multiple UDP streams. */
#include <sys/types.h>
#include <sys/socket.h>
#include <sys/sendfile.h>
#include <sys/time.h>
#include <time.h>
#include <netinet/in.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <err.h>
#include <netdb.h>

static volatile unsigned int bytes;

static void dumpstats(int signal)
{
	char buffer[128];

	sprintf(buffer, "%u\n", bytes);
	write(STDOUT_FILENO, buffer, strlen(buffer));
	_exit(0);
}

static void tcp_dumpstats(int signal)
{
	char buffer[128];

	sprintf(buffer, "TCP: %u\n", bytes);
	write(STDOUT_FILENO, buffer, strlen(buffer));
	_exit(0);
}

static void udp_receiver(unsigned int port)
{
	struct sockaddr_in saddr;
	int sock, ret;
	char buffer[65536];
	int val = 1;

	sock = socket(PF_INET, SOCK_DGRAM, 0);
	if (sock < 0)
		err(1, "creating socket");

	if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(&val)) != 0)
		err(1, "setsockopt(SO_REUSEADDR)");

	/* Wait for incoming. */
	saddr.sin_family = AF_INET;
	saddr.sin_port = htons(port);
	saddr.sin_addr.s_addr = INADDR_ANY;
	if (bind(sock, (struct sockaddr *)&saddr, sizeof(saddr)) != 0)
		err(1, "bind to port %u", port);

	/* Read packets until we get signal. */
	signal(SIGALRM, dumpstats);

	while ((ret = read(sock, buffer, sizeof(buffer))) > 0)
		bytes += ret;
	err(1, "Short read %i", ret);
}

static void udp_client(unsigned int ip, unsigned int port, int fd)
{
	/* We're going to send UDP packets to that addr. */
	struct sockaddr_in saddr;
	int sock, ret;
	char buffer[1024];

	sock = socket(PF_INET, SOCK_DGRAM, 0);
	if (sock < 0)
		err(1, "creating socket");

	/* Connect to remote. */
	saddr.sin_family = AF_INET;
	saddr.sin_port = htons(port);
	saddr.sin_addr.s_addr = ip;
	if (connect(sock, (struct sockaddr *)&saddr, sizeof(saddr)))
		err(1, "connecting socket");

	/* Write packets until we get signal. */
	signal(SIGALRM, dumpstats);

	/* Wait for "go" signal. */
	if (read(fd, buffer, 1) != 1)
		err(1, "Reading go signal");

	while ((ret = write(sock, buffer, sizeof(buffer))) > 0)
		bytes += ret;
	err(1, "Short write %i", ret);
}

/* Exits when tcp socket closes. */
static void tcp_receiver(unsigned int port)
{
	/* We're going to send TCP packets to that addr. */
	struct sockaddr_in saddr;
	int sock;
	char c;
	int val = 1;

	sock = socket(PF_INET, SOCK_STREAM, 0);
	if (sock < 0)
		err(1, "creating socket");

	if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(&val)) != 0)
		err(1, "setsockopt(SO_REUSEADDR)");

	/* Wait for incoming. */
	saddr.sin_family = AF_INET;
	saddr.sin_port = htons(port);
	saddr.sin_addr.s_addr = INADDR_ANY;
	if (bind(sock, (struct sockaddr *)&saddr, sizeof(saddr)) != 0)
		err(1, "bind to port %u", port);

	if (listen(sock, 1) != 0)
		err(1, "listen");

	sock = accept(sock, NULL, NULL);
	if (sock < 0)
		err(1, "Accepting");

	while (read(sock, &c, 1) > 0) {
		bytes++;
		if (write(sock, &c, 1) != 1)
			break;
	}
}

static void tcp_client(unsigned int ip, unsigned int port, int fd)
{
	/* We're going to send TCP packets to that addr. */
	struct sockaddr_in saddr;
	int sock;
	char c;

	sock = socket(PF_INET, SOCK_STREAM, 0);
	if (sock < 0)
		err(1, "creating socket");

	/* Connect to remote. */
	saddr.sin_family = AF_INET;
	saddr.sin_port = htons(port);
	saddr.sin_addr.s_addr = ip;
	if (connect(sock, (struct sockaddr *)&saddr, sizeof(saddr)))
		err(1, "connecting socket");

	/* Write packets until we get signal. */
	signal(SIGALRM, tcp_dumpstats);

	/* Wait for "go" signal. */
	if (read(fd, &c, 1) != 1)
		err(1, "Reading go signal");

	while (write(sock, &c, 1) > 0) {
		bytes++;
		if (read(sock, &c, 1) != 1)
			err(1, "Short read");
	}
	err(1, "Short write");
}

static unsigned int host_to_ipaddr(const char *name)
{
	struct hostent *host;
	unsigned int ip = 0;

	host = gethostbyname(name);
	if (!host)
		err(1, "gethostbyname(%s)", name);

	if (host->h_addrtype != AF_INET ||
	    host->h_length != sizeof(struct in_addr))
		errx(1, "gethostbyname gave wrong type");

	memcpy(&ip, host->h_addr_list[0], 4);
	return ip;
}

static void usage(void)
{
	errx(1,
	     "Usage: udpstress -r portnum number OR\n"
	     "	udpstress portnum number ipaddr timeout");
}

int main(int argc, char *argv[])
{
	unsigned int ip, i, port, kids;
	int fds[2];

	if (argc < 4)
		usage();

	if (pipe(fds) != 0)
		err(1, "Creating pipe");

	signal(SIGALRM, SIG_IGN);
	setsid();

	if (strcmp(argv[1], "-r") == 0) {
		port = atoi(argv[2]);
		kids = atoi(argv[3]);
		for (i = 0; i < kids; i++) {
			switch (fork()) {
			case 0:
				udp_receiver(port + i);
			case -1:
				err(1, "forking");
			}
		}

		/* Finally, receive TCP until the sender finishes. */
		tcp_receiver(port);
		{
			char buffer[128];

			/* Don't just use printf here; gets overwritten by
			 * other children when writing to file. */
			sprintf(buffer, "TCP: %u\n", bytes);
			write(STDOUT_FILENO, buffer, strlen(buffer));
		}
	} else {
		char buffer[kids = atoi(argv[2])+1];
		port = atoi(argv[1]);
		ip = host_to_ipaddr(argv[3]);
		for (i = 0; i < kids - 1; i++) {
			switch (fork()) {
			case 0:
				udp_client(ip, port + i, fds[0]);
			case -1:
				err(1, "forking");
			}
		}
		/* Finally, TCP sender. */
		switch (fork()) {
		case 0:
			tcp_client(ip, port, fds[0]);
		case -1:
			err(1, "forking");
		}

		/* Wait for them to settle. */
		sleep(1);

		if (write(fds[1], buffer, kids) != kids)
			errx(1, "Short write on pipe");
		sleep(atoi(argv[4]));
	}

	/* Kill kids. */
	kill(-getpid(), SIGALRM);

	/* We want to exit last. */
	for (i = 0; i < kids; i++)
		wait(NULL);
	return 0;
}


[-- Attachment #2: results.gnumeric --]
[-- Type: application/x-gnumeric, Size: 12248 bytes --]

      parent reply	other threads:[~2009-05-25 11:03 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-05-18 12:48 [RFC] virtio: orphan skbs if we're relying on timer to free them Rusty Russell
2009-05-19  2:40 ` David Miller
2009-05-21  6:57   ` Rusty Russell
2009-05-21  7:15     ` David Miller
2009-05-21 17:24       ` Dimitris Michailidis
2009-05-25 11:01       ` Rusty Russell [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200905252031.47868.rusty@rustcorp.com.au \
    --to=rusty@rustcorp.com.au \
    --cc=davem@davemloft.net \
    --cc=netdev@vger.kernel.org \
    --cc=virtualization@lists.linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).